From 88a309465b3f05a100c3b81966982c0f9f5d23a6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 20 Jan 2022 05:20:06 -0800 Subject: [PATCH 001/694] lib: zstd: clean up double word in comment. Remove the second 'a' and 'into'. Signed-off-by: Tom Rix Signed-off-by: Nick Terrell --- include/linux/zstd_lib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index b8c7dbf98390f..6b91758b61af9 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -1330,7 +1330,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals - * by merging them into into the literals of the next sequence. + * by merging them into the literals of the next sequence. * * As such, the final generated result has no explicit representation of block boundaries, * and the final last literals segment is not represented in the sequences. @@ -1377,7 +1377,7 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size /*! ZSTD_writeSkippableFrame() : * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. * - * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. -- GitLab From f37722ac71cc8b5ab86f4b3c4d9b9388e1315e8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:25:06 +0300 Subject: [PATCH 002/694] phy: stm32: fix an error code in probe If "index > usbphyc->nphys" is true then this returns success but it should return -EINVAL. Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)") Signed-off-by: Dan Carpenter Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index a98c911cc37ae..5bb9647b078f1 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -710,6 +710,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) ret = of_property_read_u32(child, "reg", &index); if (ret || index > usbphyc->nphys) { dev_err(&phy->dev, "invalid reg property: %d\n", ret); + if (!ret) + ret = -EINVAL; goto put_child; } -- GitLab From cbdbe312c9b6f9dbf698c3db1a5bec4140fe1c21 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 15 Oct 2022 13:11:22 -0700 Subject: [PATCH 003/694] dt-bindings: phy-j721e-wiz: add j784s4 compatible string Add ti,j784s4-wiz-10g compatible string to binding documentation. Signed-off-by: Matt Ranostay Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221015201123.195477-2-mranostay@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index 2225925b6dad8..a9e38739c0100 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -17,6 +17,7 @@ properties: - ti,j721e-wiz-10g - ti,am64-wiz-10g - ti,j7200-wiz-10g + - ti,j784s4-wiz-10g power-domains: maxItems: 1 -- GitLab From e27ecef8a8ccc13c54df54f5d100aa608de4c306 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 15 Oct 2022 13:11:23 -0700 Subject: [PATCH 004/694] phy: ti: phy-j721e-wiz: add j784s4-wiz-10g module support Add support for j784s4-wiz-10g device which has two core reference clocks (e.g core_ref_clk, core_ref1_clk) which requires an additional mux selection option. Acked-by: Roger Quadros Signed-off-by: Matt Ranostay Link: https://lore.kernel.org/r/20221015201123.195477-3-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-j721e-wiz.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 41725c6bcdf6f..141b51af4427a 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -81,14 +81,20 @@ static const struct reg_field phy_reset_n = REG_FIELD(WIZ_SERDES_RST, 31, 31); static const struct reg_field phy_en_refclk = REG_FIELD(WIZ_SERDES_RST, 30, 30); static const struct reg_field pll1_refclk_mux_sel = REG_FIELD(WIZ_SERDES_RST, 29, 29); +static const struct reg_field pll1_refclk_mux_sel_2 = + REG_FIELD(WIZ_SERDES_RST, 22, 23); static const struct reg_field pll0_refclk_mux_sel = REG_FIELD(WIZ_SERDES_RST, 28, 28); +static const struct reg_field pll0_refclk_mux_sel_2 = + REG_FIELD(WIZ_SERDES_RST, 28, 29); static const struct reg_field refclk_dig_sel_16g = REG_FIELD(WIZ_SERDES_RST, 24, 25); static const struct reg_field refclk_dig_sel_10g = REG_FIELD(WIZ_SERDES_RST, 24, 24); static const struct reg_field pma_cmn_refclk_int_mode = REG_FIELD(WIZ_SERDES_TOP_CTRL, 28, 29); +static const struct reg_field pma_cmn_refclk1_int_mode = + REG_FIELD(WIZ_SERDES_TOP_CTRL, 20, 21); static const struct reg_field pma_cmn_refclk_mode = REG_FIELD(WIZ_SERDES_TOP_CTRL, 30, 31); static const struct reg_field pma_cmn_refclk_dig_div = @@ -315,6 +321,7 @@ enum wiz_type { J721E_WIZ_10G, /* Also for J7200 SR1.0 */ AM64_WIZ_10G, J7200_WIZ_10G, /* J7200 SR2.0 */ + J784S4_WIZ_10G, }; struct wiz_data { @@ -992,6 +999,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node) switch (wiz->type) { case AM64_WIZ_10G: case J7200_WIZ_10G: + case J784S4_WIZ_10G: of_clk_del_provider(dev->of_node); return; default: @@ -1123,6 +1131,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node) switch (wiz->type) { case AM64_WIZ_10G: case J7200_WIZ_10G: + case J784S4_WIZ_10G: ret = wiz_clock_register(wiz); if (ret) dev_err(dev, "Failed to register wiz clocks\n"); @@ -1299,6 +1308,16 @@ static struct wiz_data j7200_pg2_10g_data = { .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, }; +static struct wiz_data j784s4_10g_data = { + .type = J784S4_WIZ_10G, + .pll0_refclk_mux_sel = &pll0_refclk_mux_sel_2, + .pll1_refclk_mux_sel = &pll1_refclk_mux_sel_2, + .refclk_dig_sel = &refclk_dig_sel_16g, + .pma_cmn_refclk1_int_mode = &pma_cmn_refclk1_int_mode, + .clk_mux_sel = clk_mux_sel_10g_2_refclk, + .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, +}; + static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data, @@ -1312,6 +1331,9 @@ static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j7200-wiz-10g", .data = &j7200_pg2_10g_data, }, + { + .compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data, + }, {} }; MODULE_DEVICE_TABLE(of, wiz_id_table); -- GitLab From 25caed3dcadacd0443dce4fb820e4a33029bba40 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:46:59 +0800 Subject: [PATCH 005/694] dt-binding: phy: Add i.MX8MP PCIe PHY binding Add i.MX8MP PCIe PHY binding. On i.MX8MM, the initialized default value of PERST bit(BIT3) of SRC_PCIEPHY_RCR is 1b'1. But i.MX8MP has one inversed default value 1b'0 of PERST bit. And the PERST bit should be kept 1b'1 after power and clocks are stable. So add one more PERST explicitly for i.MX8MP PCIe PHY. Signed-off-by: Richard Zhu Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1665625622-20551-2-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- .../bindings/phy/fsl,imx8-pcie-phy.yaml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml index 0af765ba27932..182a219387b0e 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - fsl,imx8mm-pcie-phy + - fsl,imx8mp-pcie-phy reg: maxItems: 1 @@ -28,11 +29,16 @@ properties: - const: ref resets: - maxItems: 1 + minItems: 1 + maxItems: 2 reset-names: - items: - - const: pciephy + oneOf: + - items: # for iMX8MM + - const: pciephy + - items: # for IMX8MP + - const: pciephy + - const: perst fsl,refclk-pad-mode: description: | @@ -60,6 +66,10 @@ properties: description: A boolean property indicating the CLKREQ# signal is not supported in the board design (optional) + power-domains: + description: PCIe PHY power domain (optional). + maxItems: 1 + required: - "#phy-cells" - compatible -- GitLab From e9e7dca53bf5a5bddf70c87157660a29cdcdd2d8 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:00 +0800 Subject: [PATCH 006/694] phy: freescale: imx8m-pcie: Refine register definitions No function changes, refine PHY register definitions. - Keep align with other CMN PHY registers, refine the definitions of PHY_CMN_REG75. - Remove two BIT definitions that are not used at all. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Link: https://lore.kernel.org/r/1665625622-20551-3-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index c93286483b425..3c8c255499cda 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -31,12 +31,10 @@ #define IMX8MM_PCIE_PHY_CMN_REG065 0x194 #define ANA_AUX_RX_TERM (BIT(7) | BIT(4)) #define ANA_AUX_TX_LVL GENMASK(3, 0) -#define IMX8MM_PCIE_PHY_CMN_REG75 0x1D4 -#define PCIE_PHY_CMN_REG75_PLL_DONE 0x3 +#define IMX8MM_PCIE_PHY_CMN_REG075 0x1D4 +#define ANA_PLL_DONE 0x3 #define PCIE_PHY_TRSV_REG5 0x414 -#define PCIE_PHY_TRSV_REG5_GEN1_DEEMP 0x2D #define PCIE_PHY_TRSV_REG6 0x418 -#define PCIE_PHY_TRSV_REG6_GEN2_DEEMP 0xF #define IMX8MM_GPR_PCIE_REF_CLK_SEL GENMASK(25, 24) #define IMX8MM_GPR_PCIE_REF_CLK_PLL FIELD_PREP(IMX8MM_GPR_PCIE_REF_CLK_SEL, 0x3) @@ -131,9 +129,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_deassert(imx8_phy->reset); /* Polling to check the phy is ready or not. */ - ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75, - val, val == PCIE_PHY_CMN_REG75_PLL_DONE, - 10, 20000); + ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075, + val, val == ANA_PLL_DONE, 10, 20000); return ret; } -- GitLab From ca679c49c4463595499a053ba94328acb574fffa Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:01 +0800 Subject: [PATCH 007/694] phy: freescale: imx8m-pcie: Refine i.MX8MM PCIe PHY driver To make it more flexible and easy to expand. Refine i.MX8MM PCIe PHY driver. - Use gpr compatible string to avoid the codes duplications when add another platform PCIe PHY support. - Re-arrange the codes to let it more flexible and easy to expand. No functional change. Re-arrange the TX tuning, since internal registers can be wrote through APB interface before assertion of CMN_RST. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-4-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 106 +++++++++++++-------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 3c8c255499cda..3e494612db3c0 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,15 @@ #define IMX8MM_GPR_PCIE_SSC_EN BIT(16) #define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE BIT(9) +enum imx8_pcie_phy_type { + IMX8MM, +}; + +struct imx8_pcie_phy_drvdata { + const char *gpr; + enum imx8_pcie_phy_type variant; +}; + struct imx8_pcie_phy { void __iomem *base; struct clk *clk; @@ -55,6 +65,7 @@ struct imx8_pcie_phy { u32 tx_deemph_gen1; u32 tx_deemph_gen2; bool clkreq_unused; + const struct imx8_pcie_phy_drvdata *drvdata; }; static int imx8_pcie_phy_power_on(struct phy *phy) @@ -66,31 +77,17 @@ static int imx8_pcie_phy_power_on(struct phy *phy) reset_control_assert(imx8_phy->reset); pad_mode = imx8_phy->refclk_pad_mode; - /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, - imx8_phy->clkreq_unused ? - 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_AUX_EN, - IMX8MM_GPR_PCIE_AUX_EN); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_POWER_OFF, 0); - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_SSC_EN, 0); - - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_REF_CLK_SEL, - pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? - IMX8MM_GPR_PCIE_REF_CLK_EXT : - IMX8MM_GPR_PCIE_REF_CLK_PLL); - usleep_range(100, 200); - - /* Do the PHY common block reset */ - regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, - IMX8MM_GPR_PCIE_CMN_RST, - IMX8MM_GPR_PCIE_CMN_RST); - usleep_range(200, 500); + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ + if (imx8_phy->tx_deemph_gen1) + writel(imx8_phy->tx_deemph_gen1, + imx8_phy->base + PCIE_PHY_TRSV_REG5); + if (imx8_phy->tx_deemph_gen2) + writel(imx8_phy->tx_deemph_gen2, + imx8_phy->base + PCIE_PHY_TRSV_REG6); + break; + } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || pad_mode == IMX8_PCIE_REFCLK_PAD_UNUSED) { @@ -118,15 +115,37 @@ static int imx8_pcie_phy_power_on(struct phy *phy) imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065); } - /* Tune PHY de-emphasis setting to pass PCIe compliance. */ - if (imx8_phy->tx_deemph_gen1) - writel(imx8_phy->tx_deemph_gen1, - imx8_phy->base + PCIE_PHY_TRSV_REG5); - if (imx8_phy->tx_deemph_gen2) - writel(imx8_phy->tx_deemph_gen2, - imx8_phy->base + PCIE_PHY_TRSV_REG6); + /* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE, + imx8_phy->clkreq_unused ? + 0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_AUX_EN, + IMX8MM_GPR_PCIE_AUX_EN); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_POWER_OFF, 0); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_SSC_EN, 0); - reset_control_deassert(imx8_phy->reset); + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_REF_CLK_SEL, + pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ? + IMX8MM_GPR_PCIE_REF_CLK_EXT : + IMX8MM_GPR_PCIE_REF_CLK_PLL); + usleep_range(100, 200); + + /* Do the PHY common block reset */ + regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14, + IMX8MM_GPR_PCIE_CMN_RST, + IMX8MM_GPR_PCIE_CMN_RST); + + switch (imx8_phy->drvdata->variant) { + case IMX8MM: + reset_control_deassert(imx8_phy->reset); + usleep_range(200, 500); + break; + } /* Polling to check the phy is ready or not. */ ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075, @@ -157,6 +176,17 @@ static const struct phy_ops imx8_pcie_phy_ops = { .owner = THIS_MODULE, }; +static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { + .gpr = "fsl,imx8mm-iomuxc-gpr", + .variant = IMX8MM, +}; + +static const struct of_device_id imx8_pcie_phy_of_match[] = { + {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + { }, +}; +MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); + static int imx8_pcie_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -169,6 +199,8 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) if (!imx8_phy) return -ENOMEM; + imx8_phy->drvdata = of_device_get_match_data(dev); + /* get PHY refclk pad mode */ of_property_read_u32(np, "fsl,refclk-pad-mode", &imx8_phy->refclk_pad_mode); @@ -194,7 +226,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) /* Grab GPR config register range */ imx8_phy->iomuxc_gpr = - syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr); if (IS_ERR(imx8_phy->iomuxc_gpr)) { dev_err(dev, "unable to find iomuxc registers\n"); return PTR_ERR(imx8_phy->iomuxc_gpr); @@ -222,12 +254,6 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } -static const struct of_device_id imx8_pcie_phy_of_match[] = { - {.compatible = "fsl,imx8mm-pcie-phy",}, - { }, -}; -MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); - static struct platform_driver imx8_pcie_phy_driver = { .probe = imx8_pcie_phy_probe, .driver = { -- GitLab From dce9edff16ee8df20e791e82e0704c4667cc3908 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Thu, 13 Oct 2022 09:47:02 +0800 Subject: [PATCH 008/694] phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY support Add i.MX8MP PCIe PHY support. Signed-off-by: Richard Zhu Signed-off-by: Lucas Stach Tested-by: Marek Vasut Tested-by: Richard Leitner Tested-by: Alexander Stein Reviewed-by: Lucas Stach Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/1665625622-20551-5-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 25 ++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index 3e494612db3c0..7585e8080b77d 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -48,6 +48,7 @@ enum imx8_pcie_phy_type { IMX8MM, + IMX8MP, }; struct imx8_pcie_phy_drvdata { @@ -60,6 +61,7 @@ struct imx8_pcie_phy { struct clk *clk; struct phy *phy; struct regmap *iomuxc_gpr; + struct reset_control *perst; struct reset_control *reset; u32 refclk_pad_mode; u32 tx_deemph_gen1; @@ -74,11 +76,11 @@ static int imx8_pcie_phy_power_on(struct phy *phy) u32 val, pad_mode; struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy); - reset_control_assert(imx8_phy->reset); - pad_mode = imx8_phy->refclk_pad_mode; switch (imx8_phy->drvdata->variant) { case IMX8MM: + reset_control_assert(imx8_phy->reset); + /* Tune PHY de-emphasis setting to pass PCIe compliance. */ if (imx8_phy->tx_deemph_gen1) writel(imx8_phy->tx_deemph_gen1, @@ -87,6 +89,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy) writel(imx8_phy->tx_deemph_gen2, imx8_phy->base + PCIE_PHY_TRSV_REG6); break; + case IMX8MP: /* Do nothing. */ + break; } if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT || @@ -141,6 +145,9 @@ static int imx8_pcie_phy_power_on(struct phy *phy) IMX8MM_GPR_PCIE_CMN_RST); switch (imx8_phy->drvdata->variant) { + case IMX8MP: + reset_control_deassert(imx8_phy->perst); + fallthrough; case IMX8MM: reset_control_deassert(imx8_phy->reset); usleep_range(200, 500); @@ -181,8 +188,14 @@ static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = { .variant = IMX8MM, }; +static const struct imx8_pcie_phy_drvdata imx8mp_drvdata = { + .gpr = "fsl,imx8mp-iomuxc-gpr", + .variant = IMX8MP, +}; + static const struct of_device_id imx8_pcie_phy_of_match[] = { {.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, }, + {.compatible = "fsl,imx8mp-pcie-phy", .data = &imx8mp_drvdata, }, { }, }; MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match); @@ -238,6 +251,14 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev) return PTR_ERR(imx8_phy->reset); } + if (imx8_phy->drvdata->variant == IMX8MP) { + imx8_phy->perst = + devm_reset_control_get_exclusive(dev, "perst"); + if (IS_ERR(imx8_phy->perst)) + dev_err_probe(dev, PTR_ERR(imx8_phy->perst), + "Failed to get PCIE PHY PERST control\n"); + } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); imx8_phy->base = devm_ioremap_resource(dev, res); if (IS_ERR(imx8_phy->base)) -- GitLab From b01d622d76134e9401970ffd3fbbb9a7051f976a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 20 Sep 2022 14:11:54 +0200 Subject: [PATCH 009/694] phy: marvell: phy-mvebu-a3700-comphy: Reset COMPHY registers before USB 3.0 power on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turris MOX board with older ARM Trusted Firmware version v1.5 is not able to detect any USB 3.0 device connected to USB-A port on Mox-A module after commit 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset support"). On the other hand USB 2.0 devices connected to the same USB-A port are working fine. It looks as if the older firmware configures COMPHY registers for USB 3.0 somehow incompatibly for kernel driver. Experiments show that resetting COMPHY registers via setting SFT_RST auto-clearing bit in COMPHY_SFT_RESET register fixes this issue. Reset the COMPHY in mvebu_a3700_comphy_usb3_power_on() function as a first step after selecting COMPHY lane and USB 3.0 function. With this change Turris MOX board can successfully detect USB 3.0 devices again. Before the above mentioned commit this reset was implemented in PHY reset method, so this is the reason why there was no issue with older firmware version then. Fixes: 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset support") Reported-by: Marek Behún Signed-off-by: Pali Rohár Tested-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20220920121154.30115-1-pali@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 67712c77d806f..d641b345afa35 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -826,6 +826,9 @@ mvebu_a3700_comphy_usb3_power_on(struct mvebu_a3700_comphy_lane *lane) if (ret) return ret; + /* COMPHY register reset (cleared automatically) */ + comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST); + /* * 0. Set PHY OTG Control(0x5d034), bit 4, Power up OTG module The * register belong to UTMI module, so it is set in UTMI phy driver. -- GitLab From 2566ad8ec418934c213cb50fd2084ffd896a2fea Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:02 +0300 Subject: [PATCH 010/694] phy: qcom-qmp-pcie: split register tables into common and extra parts SM8250 configuration tables are split into two parts: the common one and the PHY-specific tables. Make this split more formal. Rather than having a blind renamed copy of all QMP table fields, add separate struct qmp_phy_cfg_tables and add two instances of this structure to the struct qmp_phy_cfg. Later on this will be used to support different PHY modes (RC vs EP). Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-2-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 406 +++++++++++++---------- 1 file changed, 222 insertions(+), 184 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 5be5348fbb26b..ae0d7b49dfa36 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1300,31 +1300,30 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), }; +struct qmp_phy_cfg_tables { + const struct qmp_phy_init_tbl *serdes; + int serdes_num; + const struct qmp_phy_init_tbl *tx; + int tx_num; + const struct qmp_phy_init_tbl *rx; + int rx_num; + const struct qmp_phy_init_tbl *pcs; + int pcs_num; + const struct qmp_phy_init_tbl *pcs_misc; + int pcs_misc_num; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; - /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ - const struct qmp_phy_init_tbl *serdes_tbl; - int serdes_tbl_num; - const struct qmp_phy_init_tbl *serdes_tbl_sec; - int serdes_tbl_num_sec; - const struct qmp_phy_init_tbl *tx_tbl; - int tx_tbl_num; - const struct qmp_phy_init_tbl *tx_tbl_sec; - int tx_tbl_num_sec; - const struct qmp_phy_init_tbl *rx_tbl; - int rx_tbl_num; - const struct qmp_phy_init_tbl *rx_tbl_sec; - int rx_tbl_num_sec; - const struct qmp_phy_init_tbl *pcs_tbl; - int pcs_tbl_num; - const struct qmp_phy_init_tbl *pcs_tbl_sec; - int pcs_tbl_num_sec; - const struct qmp_phy_init_tbl *pcs_misc_tbl; - int pcs_misc_tbl_num; - const struct qmp_phy_init_tbl *pcs_misc_tbl_sec; - int pcs_misc_tbl_num_sec; + /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ + const struct qmp_phy_cfg_tables tables; + /* + * Additional init sequence for PHY blocks, providing additional + * register programming. Unless required it can be left omitted. + */ + const struct qmp_phy_cfg_tables *tables_rc; /* clock ids to be requested */ const char * const *clk_list; @@ -1459,14 +1458,16 @@ static const char * const sdm845_pciephy_reset_l[] = { static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, - .serdes_tbl = ipq8074_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), - .tx_tbl = ipq8074_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq8074_pcie_tx_tbl), - .rx_tbl = ipq8074_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq8074_pcie_rx_tbl), - .pcs_tbl = ipq8074_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq8074_pcie_pcs_tbl), + .tables = { + .serdes = ipq8074_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), + .tx = ipq8074_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(ipq8074_pcie_tx_tbl), + .rx = ipq8074_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(ipq8074_pcie_rx_tbl), + .pcs = ipq8074_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq8074_pcie_pcs_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1487,14 +1488,16 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .lanes = 1, - .serdes_tbl = ipq8074_pcie_gen3_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), - .tx_tbl = ipq8074_pcie_gen3_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl), - .rx_tbl = ipq8074_pcie_gen3_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), - .pcs_tbl = ipq8074_pcie_gen3_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + .tables = { + .serdes = ipq8074_pcie_gen3_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), + .tx = ipq8074_pcie_gen3_tx_tbl, + .tx_num = ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl), + .rx = ipq8074_pcie_gen3_rx_tbl, + .rx_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), + .pcs = ipq8074_pcie_gen3_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1516,16 +1519,18 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .lanes = 1, - .serdes_tbl = ipq6018_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), - .tx_tbl = ipq6018_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(ipq6018_pcie_tx_tbl), - .rx_tbl = ipq6018_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(ipq6018_pcie_rx_tbl), - .pcs_tbl = ipq6018_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(ipq6018_pcie_pcs_tbl), - .pcs_misc_tbl = ipq6018_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl), + .tables = { + .serdes = ipq6018_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), + .tx = ipq6018_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(ipq6018_pcie_tx_tbl), + .rx = ipq6018_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(ipq6018_pcie_rx_tbl), + .pcs = ipq6018_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(ipq6018_pcie_pcs_tbl), + .pcs_misc = ipq6018_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl), + }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1545,16 +1550,18 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sdm845_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), - .tx_tbl = sdm845_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl), - .rx_tbl = sdm845_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl), - .pcs_tbl = sdm845_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sdm845_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sdm845_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), + .tx = sdm845_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl), + .rx = sdm845_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl), + .pcs = sdm845_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl), + .pcs_misc = sdm845_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1575,14 +1582,16 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sdm845_qhp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), - .tx_tbl = sdm845_qhp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl), - .rx_tbl = sdm845_qhp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl), - .pcs_tbl = sdm845_qhp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl), + .tables = { + .serdes = sdm845_qhp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), + .tx = sdm845_qhp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl), + .rx = sdm845_qhp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl), + .pcs = sdm845_qhp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1603,24 +1612,28 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sm8250_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), - .serdes_tbl_sec = sm8250_qmp_gen3x1_pcie_serdes_tbl, - .serdes_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), - .tx_tbl = sm8250_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), - .rx_tbl = sm8250_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), - .rx_tbl_sec = sm8250_qmp_gen3x1_pcie_rx_tbl, - .rx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl), - .pcs_tbl = sm8250_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), - .pcs_tbl_sec = sm8250_qmp_gen3x1_pcie_pcs_tbl, - .pcs_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl), - .pcs_misc_tbl = sm8250_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), - .pcs_misc_tbl_sec = sm8250_qmp_gen3x1_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8250_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), + .tx = sm8250_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), + .rx = sm8250_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), + .pcs = sm8250_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), + }, + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8250_qmp_gen3x1_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), + .rx = sm8250_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl), + .pcs = sm8250_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1641,24 +1654,28 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sm8250_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), - .tx_tbl = sm8250_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), - .tx_tbl_sec = sm8250_qmp_gen3x2_pcie_tx_tbl, - .tx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), - .rx_tbl = sm8250_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), - .rx_tbl_sec = sm8250_qmp_gen3x2_pcie_rx_tbl, - .rx_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl), - .pcs_tbl = sm8250_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), - .pcs_tbl_sec = sm8250_qmp_gen3x2_pcie_pcs_tbl, - .pcs_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl), - .pcs_misc_tbl = sm8250_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), - .pcs_misc_tbl_sec = sm8250_qmp_gen3x2_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num_sec = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8250_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), + .tx = sm8250_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl), + .rx = sm8250_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl), + .pcs = sm8250_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), + }, + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tx = sm8250_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), + .rx = sm8250_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl), + .pcs = sm8250_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sm8250_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1679,14 +1696,16 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .lanes = 1, - .serdes_tbl = msm8998_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), - .tx_tbl = msm8998_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(msm8998_pcie_tx_tbl), - .rx_tbl = msm8998_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(msm8998_pcie_rx_tbl), - .pcs_tbl = msm8998_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(msm8998_pcie_pcs_tbl), + .tables = { + .serdes = msm8998_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), + .tx = msm8998_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(msm8998_pcie_tx_tbl), + .rx = msm8998_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(msm8998_pcie_rx_tbl), + .pcs = msm8998_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(msm8998_pcie_pcs_tbl), + }, .clk_list = msm8996_phy_clk_l, .num_clks = ARRAY_SIZE(msm8996_phy_clk_l), .reset_list = ipq8074_pciephy_reset_l, @@ -1703,16 +1722,18 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), - .tx_tbl = sc8180x_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), - .rx_tbl = sc8180x_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl), - .pcs_tbl = sc8180x_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sc8180x_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sc8180x_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), + .tx = sc8180x_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), + .rx = sc8180x_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl), + .pcs = sc8180x_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl), + .pcs_misc = sc8180x_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1732,16 +1753,18 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sdx55_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), - .tx_tbl = sdx55_qmp_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl), - .rx_tbl = sdx55_qmp_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl), - .pcs_tbl = sdx55_qmp_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl), - .pcs_misc_tbl = sdx55_qmp_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl), + .tables = { + .serdes = sdx55_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), + .tx = sdx55_qmp_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl), + .rx = sdx55_qmp_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl), + .pcs = sdx55_qmp_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl), + .pcs_misc = sdx55_qmp_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1762,16 +1785,18 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .serdes_tbl = sm8450_qmp_gen3x1_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), - .tx_tbl = sm8450_qmp_gen3x1_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl), - .rx_tbl = sm8450_qmp_gen3x1_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl), - .pcs_tbl = sm8450_qmp_gen3x1_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl), - .pcs_misc_tbl = sm8450_qmp_gen3x1_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8450_qmp_gen3x1_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), + .tx = sm8450_qmp_gen3x1_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl), + .rx = sm8450_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl), + .pcs = sm8450_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sm8450_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1792,16 +1817,18 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .lanes = 2, - .serdes_tbl = sm8450_qmp_gen4x2_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), - .tx_tbl = sm8450_qmp_gen4x2_pcie_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl), - .rx_tbl = sm8450_qmp_gen4x2_pcie_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl), - .pcs_tbl = sm8450_qmp_gen4x2_pcie_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl), - .pcs_misc_tbl = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, - .pcs_misc_tbl_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), + .tables = { + .serdes = sm8450_qmp_gen4x2_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), + .tx = sm8450_qmp_gen4x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl), + .rx = sm8450_qmp_gen4x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl), + .pcs = sm8450_qmp_gen4x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), + }, .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, @@ -1850,17 +1877,49 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, regs, tbl, num, 0xff); } -static int qmp_pcie_serdes_init(struct qmp_phy *qphy) +static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; - const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; - int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_pcie_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); - qmp_pcie_configure(serdes, cfg->regs, cfg->serdes_tbl_sec, cfg->serdes_tbl_num_sec); + if (!tables) + return; - return 0; + qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num); +} + +static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +{ + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *tx = qphy->tx; + void __iomem *rx = qphy->rx; + + if (!tables) + return; + + qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1); + + if (cfg->lanes >= 2) + qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2); + + qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1); + if (cfg->lanes >= 2) + qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2); +} + +static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +{ + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *pcs = qphy->pcs; + void __iomem *pcs_misc = qphy->pcs_misc; + + if (!tables) + return; + + qmp_pcie_configure(pcs, cfg->regs, + tables->pcs, tables->pcs_num); + qmp_pcie_configure(pcs_misc, cfg->regs, + tables->pcs_misc, tables->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) @@ -1932,15 +1991,13 @@ static int qmp_pcie_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; void __iomem *status; unsigned int mask, val, ready; int ret; - qmp_pcie_serdes_init(qphy); + qmp_pcie_serdes_init(qphy, &cfg->tables); + qmp_pcie_serdes_init(qphy, cfg->tables_rc); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { @@ -1949,31 +2006,11 @@ static int qmp_pcie_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); - qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl_sec, cfg->tx_tbl_num_sec, 1); - - if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 2); - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl_sec, - cfg->tx_tbl_num_sec, 2); - } - - qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); - qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl_sec, cfg->rx_tbl_num_sec, 1); - - if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 2); - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl_sec, - cfg->rx_tbl_num_sec, 2); - } - - qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); - qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl_sec, cfg->pcs_tbl_num_sec); + qmp_pcie_lanes_init(qphy, &cfg->tables); + qmp_pcie_lanes_init(qphy, cfg->tables_rc); - qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl, cfg->pcs_misc_tbl_num); - qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl_sec, cfg->pcs_misc_tbl_num_sec); + qmp_pcie_pcs_init(qphy, &cfg->tables); + qmp_pcie_pcs_init(qphy, cfg->tables_rc); /* * Pull out PHY from POWER DOWN state. @@ -2240,7 +2277,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, qphy->pcs_misc = qphy->pcs + 0x400; if (IS_ERR(qphy->pcs_misc)) { - if (cfg->pcs_misc_tbl || cfg->pcs_misc_tbl_sec) + if (cfg->tables.pcs_misc || + (cfg->tables_rc && cfg->tables_rc->pcs_misc)) return PTR_ERR(qphy->pcs_misc); } -- GitLab From 11bf53a38c82baef349b4efc6a84f069dab7085a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:03 +0300 Subject: [PATCH 011/694] phy: qcom-qmp-pcie: support separate tables for EP mode The PCIe QMP PHY requires different programming sequences when being used for the RC (Root Complex) or for the EP (End Point) modes. Allow selecting the submode and thus selecting a set of PHY programming tables. Since the RC and EP modes share common some common init sequence, the common sequence is kept in the main table and the sequence differences are pushed to the extra tables. Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-3-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 46 ++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index ae0d7b49dfa36..ba01338d93acd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1320,10 +1321,14 @@ struct qmp_phy_cfg { /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_cfg_tables tables; /* - * Additional init sequence for PHY blocks, providing additional - * register programming. Unless required it can be left omitted. + * Additional init sequences for PHY blocks, providing additional + * register programming. They are used for providing separate sequences + * for the Root Complex and End Point use cases. + * + * If EP mode is not supported, both tables can be left unset. */ const struct qmp_phy_cfg_tables *tables_rc; + const struct qmp_phy_cfg_tables *tables_ep; /* clock ids to be requested */ const char * const *clk_list; @@ -1367,6 +1372,7 @@ struct qmp_phy_cfg { * @pcs_misc: iomapped memory space for lane's pcs_misc * @pipe_clk: pipe clock * @qmp: QMP phy to which this lane belongs + * @mode: currently selected PHY mode */ struct qmp_phy { struct phy *phy; @@ -1380,6 +1386,7 @@ struct qmp_phy { void __iomem *pcs_misc; struct clk *pipe_clk; struct qcom_qmp *qmp; + int mode; }; /** @@ -1991,13 +1998,19 @@ static int qmp_pcie_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct qmp_phy_cfg_tables *mode_tables; void __iomem *pcs = qphy->pcs; void __iomem *status; unsigned int mask, val, ready; int ret; + if (qphy->mode == PHY_MODE_PCIE_RC) + mode_tables = cfg->tables_rc; + else + mode_tables = cfg->tables_ep; + qmp_pcie_serdes_init(qphy, &cfg->tables); - qmp_pcie_serdes_init(qphy, cfg->tables_rc); + qmp_pcie_serdes_init(qphy, mode_tables); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { @@ -2007,10 +2020,10 @@ static int qmp_pcie_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_pcie_lanes_init(qphy, &cfg->tables); - qmp_pcie_lanes_init(qphy, cfg->tables_rc); + qmp_pcie_lanes_init(qphy, mode_tables); qmp_pcie_pcs_init(qphy, &cfg->tables); - qmp_pcie_pcs_init(qphy, cfg->tables_rc); + qmp_pcie_pcs_init(qphy, mode_tables); /* * Pull out PHY from POWER DOWN state. @@ -2097,6 +2110,23 @@ static int qmp_pcie_disable(struct phy *phy) return qmp_pcie_exit(phy); } +static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + switch (submode) { + case PHY_MODE_PCIE_RC: + case PHY_MODE_PCIE_EP: + qphy->mode = submode; + break; + default: + dev_err(&phy->dev, "Unsupported submode %d\n", submode); + return -EINVAL; + } + + return 0; +} + static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2220,6 +2250,7 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) static const struct phy_ops qmp_pcie_ops = { .power_on = qmp_pcie_enable, .power_off = qmp_pcie_disable, + .set_mode = qmp_pcie_set_mode, .owner = THIS_MODULE, }; @@ -2235,6 +2266,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, if (!qphy) return -ENOMEM; + qphy->mode = PHY_MODE_PCIE_RC; + qphy->cfg = cfg; qphy->serdes = serdes; /* @@ -2278,7 +2311,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, if (IS_ERR(qphy->pcs_misc)) { if (cfg->tables.pcs_misc || - (cfg->tables_rc && cfg->tables_rc->pcs_misc)) + (cfg->tables_rc && cfg->tables_rc->pcs_misc) || + (cfg->tables_ep && cfg->tables_ep->pcs_misc)) return PTR_ERR(qphy->pcs_misc); } -- GitLab From f5682f13b7ab0bbdffd11934afe4b5c011d5be74 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:04 +0300 Subject: [PATCH 012/694] phy: qcom-qmp-pcie: Support SM8450 PCIe1 PHY in EP mode Add support for using PCIe1 (gen4x2) in EP mode on SM8450. The tables to program are mostly common with the RC mode tables, so only register difference are split into separate RC and EP tables. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220927092207.161501-4-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 78 +++++++++++++++---- .../qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 1 + 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index ba01338d93acd..f3f75eda01a6d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1185,15 +1185,29 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_pcs_misc_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x97), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x0c), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06), QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16), @@ -1201,8 +1215,6 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36), QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a), QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14), @@ -1215,17 +1227,8 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0x55), QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0x55), QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x05), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88), QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x20), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14), - QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_tx_tbl[] = { @@ -1293,14 +1296,44 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), - QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_EQ_CONFIG1, 0x16), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3, 0x28), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e), }; +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BG_TIMER, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYS_CLK_CTRL, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x27), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x09), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x19), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x28), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE0, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE0, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE1, 0xfb), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x60), +}; + +static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), +}; + struct qmp_phy_cfg_tables { const struct qmp_phy_init_tbl *serdes; int serdes_num; @@ -1836,6 +1869,21 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .pcs_misc = sm8450_qmp_gen4x2_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), }, + + .tables_rc = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8450_qmp_gen4x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl), + }, + + .tables_ep = &(const struct qmp_phy_cfg_tables) { + .serdes = sm8450_qmp_gen4x2_pcie_ep_serdes_tbl, + .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl), + .pcs_misc = sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl), + }, + .clk_list = sdm845_pciephy_clk_l, .num_clks = ARRAY_SIZE(sdm845_pciephy_clk_l), .reset_list = sdm845_pciephy_reset_l, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index 1eedf50cf9cbc..c9fa90b454756 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -8,6 +8,7 @@ /* Only for QMP V5_20 PHY - PCIe PCS registers */ #define QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x01c +#define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5 0x084 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 -- GitLab From f90747d1b641aad244cca7d6aa20aa25f33ae8e4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:05 +0300 Subject: [PATCH 013/694] PCI: qcom: Setup PHY to work in RC mode Call phy_set_mode_ext() to notify the PHY driver that the PHY is being used in the RC mode. Reviewed-by: Jingoo Han Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220927092207.161501-5-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/pci/controller/dwc/pcie-qcom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index f711acacaeaf8..7db94a22238d1 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1497,6 +1498,10 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; + ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC); + if (ret) + goto err_deinit; + ret = phy_power_on(pcie->phy); if (ret) goto err_deinit; -- GitLab From a84ed1919fb3fc767ae3aad13bbff8ea8eaceedd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 27 Sep 2022 12:22:06 +0300 Subject: [PATCH 014/694] PCI: qcom-ep: Setup PHY to work in EP mode Call phy_set_mode_ext() to notify the PHY driver that the PHY is being used in the EP mode. Reviewed-by: Manivannan Sadhasivam Reviewed-by: Jingoo Han Reviewed-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220927092207.161501-6-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 6d0d1b759ca24..19b32839ea261 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -268,6 +269,10 @@ static int qcom_pcie_enable_resources(struct qcom_pcie_ep *pcie_ep) if (ret) goto err_disable_clk; + ret = phy_set_mode_ext(pcie_ep->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_EP); + if (ret) + goto err_phy_exit; + ret = phy_power_on(pcie_ep->phy); if (ret) goto err_phy_exit; -- GitLab From 8d3bf72497a8def5dc75e10a2229f1c692598b97 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:03 +0200 Subject: [PATCH 015/694] phy: qcom-qmp: fix obsolete lane comments All QMP drivers but the MSM8996 and combo ones handle exactly one PHY and the corresponding memory resources are not per-lane, but per PHY. Update the obsolete comments. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9807c4d935cdb..8a2a35c0855b4 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2669,7 +2669,7 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for each PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 461f0b5d464a8..707ec81c7a2ae 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -777,7 +777,7 @@ static int qmp_pcie_msm8996_create(struct device *dev, struct device_node *np, i qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for each PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. */ qphy->tx = devm_of_iomap(dev, np, 0, NULL); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f3f75eda01a6d..e0408c423ac6d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2319,7 +2319,7 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index c08d34ad1313e..db5642e1f7154 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1094,7 +1094,7 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index b84c0d4b57541..965e486ab87df 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2581,7 +2581,7 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each phy lane: + * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. -- GitLab From f823346de8b1fa44bbab3ef62d40e9616332c7ee Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:04 +0200 Subject: [PATCH 016/694] phy: qcom-qmp-combo: drop unused UFS reset Drop the unused UFS reset code which isn't used since the QMP driver split. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8a2a35c0855b4..c21512b9ab528 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -944,7 +944,6 @@ struct qmp_phy_dp_clks { * @phys: array of per-lane phy descriptors * @phy_mutex: mutex lock for PHY common block initialization * @init_count: phy common block initialization count - * @ufs_reset: optional UFS PHY reset handle */ struct qcom_qmp { struct device *dev; @@ -958,8 +957,6 @@ struct qcom_qmp { struct mutex phy_mutex; int init_count; - - struct reset_control *ufs_reset; }; static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy); @@ -2027,8 +2024,6 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) return 0; } - reset_control_assert(qmp->ufs_reset); - reset_control_bulk_assert(cfg->num_resets, qmp->resets); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); @@ -2103,10 +2098,6 @@ static int qmp_combo_power_on(struct phy *phy) else qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); - ret = reset_control_deassert(qmp->ufs_reset); - if (ret) - goto err_disable_pipe_clk; - if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); -- GitLab From 4567bb1799d253ceb81ba9c9837ae13a86e4b50a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:05 +0200 Subject: [PATCH 017/694] phy: qcom-qmp-pcie: drop unused common-block registers Drop the common-block register defines that are unused since the QMP driver split. Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e0408c423ac6d..bdbd18b89a8cd 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -78,11 +78,6 @@ struct qmp_phy_init_tbl { /* set of registers with offsets different per-PHY */ enum qphy_reg_layout { - /* Common block control registers */ - QPHY_COM_SW_RESET, - QPHY_COM_POWER_DOWN_CONTROL, - QPHY_COM_START_CONTROL, - QPHY_COM_PCS_READY_STATUS, /* PCS registers */ QPHY_SW_RESET, QPHY_START_CTRL, @@ -100,10 +95,6 @@ static const unsigned int ipq_pciephy_gen3_regs_layout[QPHY_LAYOUT_SIZE] = { }; static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { - [QPHY_COM_SW_RESET] = 0x400, - [QPHY_COM_POWER_DOWN_CONTROL] = 0x404, - [QPHY_COM_START_CONTROL] = 0x408, - [QPHY_COM_PCS_READY_STATUS] = 0x448, [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, -- GitLab From 6d5b1e2067aef151747b4ec1cd927d44b61e4293 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:06 +0200 Subject: [PATCH 018/694] phy: qcom-qmp-pcie: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bdbd18b89a8cd..1105d439828c7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -98,18 +98,21 @@ static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_qmp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_qhp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x2ac, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm8250_pcie_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -1999,13 +2002,8 @@ static int qmp_pcie_init(struct phy *phy) if (ret) goto err_assert_reset; - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -2112,13 +2110,8 @@ static int qmp_pcie_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } -- GitLab From 5b68d95c3fc72b4a89b9c7549e1ef638a01a3e15 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:07 +0200 Subject: [PATCH 019/694] phy: qcom-qmp-pcie: move power-down update Move the power-down-control register update that powers on the PHY to the power-on handler so that it matches the power-off handler. Note that the power-on handler is currently always called directly after init. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 1105d439828c7..b42c5e185228c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1976,7 +1976,6 @@ static int qmp_pcie_init(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; int ret; /* turn on regulator supplies */ @@ -2002,9 +2001,6 @@ static int qmp_pcie_init(struct phy *phy) if (ret) goto err_assert_reset; - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - return 0; err_assert_reset: @@ -2041,6 +2037,9 @@ static int qmp_pcie_power_on(struct phy *phy) unsigned int mask, val, ready; int ret; + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); + if (qphy->mode == PHY_MODE_PCIE_RC) mode_tables = cfg->tables_rc; else -- GitLab From 4d3701f94f274ac67cddd1e87a2311a2a40c0138 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:08 +0200 Subject: [PATCH 020/694] phy: qcom-qmp-pcie-msm8996: clean up power-down handling This driver uses v2 registers only so drop the unnecessary POWER_DOWN_CONTROL override. Note that this register is already hard-coded when powering on the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 707ec81c7a2ae..5fdd85a1dc3ef 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -91,7 +91,6 @@ enum qphy_reg_layout { QPHY_SW_RESET, QPHY_START_CTRL, QPHY_PCS_STATUS, - QPHY_PCS_POWER_DOWN_CONTROL, /* Keep last to ensure regs_layout arrays are properly initialized */ QPHY_LAYOUT_SIZE }; @@ -591,13 +590,8 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, + cfg->pwrdn_ctrl); return 0; } -- GitLab From 2e52ddf045a08fcae8dc4c88d10aa01252dd4165 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:09 +0200 Subject: [PATCH 021/694] phy: qcom-qmp-combo: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v3) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c21512b9ab528..7b434e2ee640e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -121,6 +121,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_SW_RESET] = 0x00, [QPHY_START_CTRL] = 0x08, [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0dc, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170, @@ -1991,13 +1992,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); mutex_unlock(&qmp->phy_mutex); @@ -2144,13 +2140,8 @@ static int qmp_combo_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); } return 0; -- GitLab From 2d3068cf8d9aa80cfbe2dd4226abbf425c26f8b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:10 +0200 Subject: [PATCH 022/694] phy: qcom-qmp-ufs: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v4) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index db5642e1f7154..e28c45ab74ea2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -89,22 +89,26 @@ enum qphy_reg_layout { static const unsigned int msm8996_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x168, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x160, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm6115_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x168, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int sm8150_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = QPHY_V4_PCS_UFS_PHY_START, [QPHY_PCS_READY_STATUS] = QPHY_V4_PCS_UFS_READY_STATUS, [QPHY_SW_RESET] = QPHY_V4_PCS_UFS_SW_RESET, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V4_PCS_UFS_POWER_DOWN_CONTROL, }; static const struct qmp_phy_init_tbl msm8996_ufs_serdes_tbl[] = { @@ -856,13 +860,8 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) if (ret) goto err_disable_regulators; - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -996,13 +995,8 @@ static int qmp_ufs_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } -- GitLab From 645d3d04702401e002928b934b830bd25be9e277 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:11 +0200 Subject: [PATCH 023/694] phy: qcom-qmp-usb: clean up power-down handling Always define the POWER_DOWN_CONTROL register instead of falling back to the v2 (and v3) offset during power on and power off. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 965e486ab87df..b0b13fb6cb59e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -126,6 +126,7 @@ static const unsigned int usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d4, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x178, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -135,6 +136,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x0d8, [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x0dc, [QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, }; static const unsigned int qmp_v4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { @@ -2164,13 +2166,8 @@ static int qmp_usb_init(struct phy *phy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); } - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; @@ -2277,13 +2274,8 @@ static int qmp_usb_power_off(struct phy *phy) qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); /* Put PHY into POWER DOWN state: active low */ - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) { - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); - } else { - qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); - } + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + cfg->pwrdn_ctrl); return 0; } -- GitLab From 5b76f5ec63e0bfd20d955fc9d09dc2cff7742bec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:12 +0200 Subject: [PATCH 024/694] phy: qcom-qmp-pcie: clean up clock lists Keep the clock lists together and sorted by symbol name. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index b42c5e185228c..cb2128e5a78d4 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1462,6 +1462,10 @@ static inline void qphy_clrbits(void __iomem *base, u32 offset, u32 val) } /* list of clocks required by phy */ +static const char * const ipq8074_pciephy_clk_l[] = { + "aux", "cfg_ahb", +}; + static const char * const msm8996_phy_clk_l[] = { "aux", "cfg_ahb", "ref", }; @@ -1476,10 +1480,6 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; -static const char * const ipq8074_pciephy_clk_l[] = { - "aux", "cfg_ahb", -}; - /* list of resets */ static const char * const ipq8074_pciephy_reset_l[] = { "phy", "common", -- GitLab From 2d93887cb4bac0a36ce9e146956f631ab7994680 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 17 Oct 2022 08:50:13 +0200 Subject: [PATCH 025/694] phy: qcom-qmp-pcie: drop bogus register update Since commit 0d58280cf1e6 ("phy: Update PHY power control sequence") the PHY is powered on before configuring the registers and only the MSM8996 PCIe PHY, which includes the POWER_DOWN_CONTROL register in its PCS initialisation table, may possibly require a second update afterwards. To make things worse, the POWER_DOWN_CONTROL register lies at a different offset on more recent SoCs so that the second update, which still used a hard-coded offset, would write to an unrelated register (e.g. a revision-id register on SC8280XP). As the MSM8996 PCIe PHY is now handled by a separate driver, simply drop the bogus register update. Fixes: e4d8b05ad5f9 ("phy: qcom-qmp: Use proper PWRDOWN offset for sm8150 USB") added support Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov #RB3 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221017065013.19647-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index cb2128e5a78d4..30838ae8f0272 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2061,12 +2061,6 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, &cfg->tables); qmp_pcie_pcs_init(qphy, mode_tables); - /* - * Pull out PHY from POWER DOWN state. - * This is active low enable signal to power-down PHY. - */ - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); - if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); -- GitLab From f74495761df10c25a98256d16ea7465191b6e2cd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 18 Oct 2022 09:25:00 +0800 Subject: [PATCH 026/694] soundwire: dmi-quirks: add quirk variant for LAPBC710 NUC15 Some NUC15 LAPBC710 devices don't expose the same DMI information as the Intel reference, add additional entry in the match table. BugLink: https://github.com/thesofproject/linux/issues/3885 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221018012500.1592994-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index f81cdd83ec26e..7969881f126dc 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -90,6 +90,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { }, .driver_data = (void *)intel_tgl_bios, }, + { + /* quirk used for NUC15 LAPBC710 skew */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"), + }, + .driver_data = (void *)intel_tgl_bios, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), -- GitLab From 3c6bd6fa83bb6c7a891891a8a32aea2820aadb06 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:37 -0400 Subject: [PATCH 027/694] dt-bindings: dma: qcom: gpi: add fallback compatible The drivers are transitioning from matching against lists of specific compatible strings to matching against smaller lists of more generic compatible strings. Use the SDM845 compatible string as a fallback in the schema to support this change. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-2-mailingradian@gmail.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/qcom,gpi.yaml | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index eabf8a76d3a05..182b8573230d3 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -18,14 +18,18 @@ allOf: properties: compatible: - enum: - - qcom,sc7280-gpi-dma - - qcom,sdm845-gpi-dma - - qcom,sm6350-gpi-dma - - qcom,sm8150-gpi-dma - - qcom,sm8250-gpi-dma - - qcom,sm8350-gpi-dma - - qcom,sm8450-gpi-dma + oneOf: + - enum: + - qcom,sc7280-gpi-dma + - qcom,sdm845-gpi-dma + - qcom,sm6350-gpi-dma + - qcom,sm8350-gpi-dma + - qcom,sm8450-gpi-dma + - items: + - enum: + - qcom,sm8150-gpi-dma + - qcom,sm8250-gpi-dma + - const: qcom,sdm845-gpi-dma reg: maxItems: 1 -- GitLab From 67fd570d734d35ef6b5e8ee5a3195a2aa843c2d8 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:38 -0400 Subject: [PATCH 028/694] dt-bindings: dma: qcom: gpi: add compatible for sdm670 The Snapdragon 670 uses GPI DMA for its GENI interface. Add a compatible string for it in the documentation. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-3-mailingradian@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 182b8573230d3..6f7dcae944e4d 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -27,6 +27,7 @@ properties: - qcom,sm8450-gpi-dma - items: - enum: + - qcom,sdm670-gpi-dma - qcom,sm8150-gpi-dma - qcom,sm8250-gpi-dma - const: qcom,sdm845-gpi-dma -- GitLab From 8527721ee6bd596a211fa2a1bbaf939e994cb89c Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 17 Oct 2022 20:57:39 -0400 Subject: [PATCH 029/694] dmaengine: qcom: deprecate redundant of_device_id entries The drivers are transitioning from matching against lists of specific compatible strings to matching against smaller lists of more generic compatible strings. Add a message that the compatible strings with an ee_offset of 0 are deprecated except for the SDM845 compatible string. Signed-off-by: Richard Acayan Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221018005740.23952-4-mailingradian@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 3f56514bbef8f..f8e19e6e6117d 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2289,6 +2289,10 @@ static const struct of_device_id gpi_of_match[] = { { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, + /* + * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as + * fallback and not need their own entries here. + */ { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, -- GitLab From dfad1e14b27734b204ea821977d43b16d1d1919f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Oct 2022 19:03:48 -0400 Subject: [PATCH 030/694] dt-bindings: dma: qcom: gpi: Use sm6350 fallback Several devices like SM6350, SM8150 and SC7280 are actually compatible, so use one compatible fallback for all of them. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221018230352.1238479-2-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 6f7dcae944e4d..0c28944988456 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -20,11 +20,14 @@ properties: compatible: oneOf: - enum: - - qcom,sc7280-gpi-dma - qcom,sdm845-gpi-dma - qcom,sm6350-gpi-dma - - qcom,sm8350-gpi-dma - - qcom,sm8450-gpi-dma + - items: + - enum: + - qcom,sc7280-gpi-dma + - qcom,sm8350-gpi-dma + - qcom,sm8450-gpi-dma + - const: qcom,sm6350-gpi-dma - items: - enum: - qcom,sdm670-gpi-dma -- GitLab From 88bc8ac63db045e74b2ea7015c51384c05b09ae5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Oct 2022 19:03:49 -0400 Subject: [PATCH 031/694] dmaengine: qcom: gpi: Document preferred SM6350 binding Devices with ee offset of 0x10000 should rather bind with SM6350 compatible, so the list will not unnecessarily grow for compatible devices. Signed-off-by: Krzysztof Kozlowski Acked-by: Richard Acayan Link: https://lore.kernel.org/r/20221018230352.1238479-3-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index f8e19e6e6117d..061add8322951 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2286,13 +2286,14 @@ static int gpi_probe(struct platform_device *pdev) } static const struct of_device_id gpi_of_match[] = { - { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 }, /* - * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as - * fallback and not need their own entries here. + * Do not grow the list for compatible devices. Instead use + * qcom,sdm845-gpi-dma (for ee_offset = 0x0) or qcom,sm6350-gpi-dma + * (for ee_offset = 0x10000). */ + { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 }, { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 }, { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 }, -- GitLab From 4967a7803c341361a8bf67ace206bca8b390dc22 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Oct 2022 17:09:13 +0100 Subject: [PATCH 032/694] dmaengine: ioat: Fix spelling mistake "idel" -> "idle" There is a spelling mistake in the module description. Fix it. Signed-off-by: Colin Ian King Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20221004160913.154739-1-colin.i.king@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6cad28..79d244011093c 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -33,7 +33,7 @@ MODULE_PARM_DESC(completion_timeout, static int idle_timeout = 2000; module_param(idle_timeout, int, 0644); MODULE_PARM_DESC(idle_timeout, - "set ioat idel timeout [msec] (default 2000 [msec])"); + "set ioat idle timeout [msec] (default 2000 [msec])"); #define IDLE_TIMEOUT msecs_to_jiffies(idle_timeout) #define COMPLETION_TIMEOUT msecs_to_jiffies(completion_timeout) -- GitLab From beb6f6493853d862490f0d5b99910caa358dd3d4 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:18 -0700 Subject: [PATCH 033/694] of/irq: export of_msi_get_domain Export of_mis_get_domain to enable it for users from outside. Signed-off-by: Matthias Brugger Acked-by: Rob Herring Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200122104723.16955-1-peter.ujfalusi@ti.com Signed-off-by: Kevin Hilman Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220929234820.940048-2-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/of/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 2bac44f09554b..e9bf5236ed892 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -730,6 +730,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev, return NULL; } +EXPORT_SYMBOL_GPL(of_msi_get_domain); /** * of_msi_configure - Set the msi_domain field of a device -- GitLab From 56b0a668cb35c5f04ef98ffc22b297f116fe7108 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:19 -0700 Subject: [PATCH 034/694] dmaengine: ti: convert k3-udma to module Currently k3-udma driver is built as separate platform drivers with a shared probe and identical code path, just differnet platform data. To enable to build as module, convert the separate platform driver into a single module_platform_driver with the data selection done via compatible string and of_match. The separate of_match tables are also combined into a single table to avoid the multiple calls to of_match_node() Since all modern TI platforms using this are DT enabled, the removal of separate platform_drivers should have no functional change. Acked-by: Peter Ujfalusi Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20220929234820.940048-3-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Kconfig | 4 ++-- drivers/dma/ti/k3-udma-glue.c | 5 ++++- drivers/dma/ti/k3-udma.c | 40 +++++------------------------------ 3 files changed, 11 insertions(+), 38 deletions(-) diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index 79618fac119a7..f196be3b222f9 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig @@ -35,7 +35,7 @@ config DMA_OMAP DMA engine is found on OMAP and DRA7xx parts. config TI_K3_UDMA - bool "Texas Instruments UDMA support" + tristate "Texas Instruments UDMA support" depends on ARCH_K3 depends on TI_SCI_PROTOCOL depends on TI_SCI_INTA_IRQCHIP @@ -48,7 +48,7 @@ config TI_K3_UDMA DMA engine is used in AM65x and j721e. config TI_K3_UDMA_GLUE_LAYER - bool "Texas Instruments UDMA Glue layer for non DMAengine users" + tristate "Texas Instruments UDMA Glue layer for non DMAengine users" depends on ARCH_K3 depends on TI_K3_UDMA help diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index 4fdd9f06b7235..c29de4695ae7a 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -6,6 +6,7 @@ * */ +#include #include #include #include @@ -1433,4 +1434,6 @@ static int __init k3_udma_glue_class_init(void) { return class_register(&k3_udma_glue_devclass); } -arch_initcall(k3_udma_glue_class_init); + +module_init(k3_udma_glue_class_init); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 7b5081989b3d6..ce8b80bb34d7d 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -4335,18 +4336,10 @@ static const struct of_device_id udma_of_match[] = { .compatible = "ti,j721e-navss-mcu-udmap", .data = &j721e_mcu_data, }, - { /* Sentinel */ }, -}; - -static const struct of_device_id bcdma_of_match[] = { { .compatible = "ti,am64-dmss-bcdma", .data = &am64_bcdma_data, }, - { /* Sentinel */ }, -}; - -static const struct of_device_id pktdma_of_match[] = { { .compatible = "ti,am64-dmss-pktdma", .data = &am64_pktdma_data, @@ -5271,14 +5264,9 @@ static int udma_probe(struct platform_device *pdev) return -ENOMEM; match = of_match_node(udma_of_match, dev->of_node); - if (!match) - match = of_match_node(bcdma_of_match, dev->of_node); if (!match) { - match = of_match_node(pktdma_of_match, dev->of_node); - if (!match) { - dev_err(dev, "No compatible match found\n"); - return -ENODEV; - } + dev_err(dev, "No compatible match found\n"); + return -ENODEV; } ud->match_data = match->data; @@ -5511,27 +5499,9 @@ static struct platform_driver udma_driver = { }, .probe = udma_probe, }; -builtin_platform_driver(udma_driver); -static struct platform_driver bcdma_driver = { - .driver = { - .name = "ti-bcdma", - .of_match_table = bcdma_of_match, - .suppress_bind_attrs = true, - }, - .probe = udma_probe, -}; -builtin_platform_driver(bcdma_driver); - -static struct platform_driver pktdma_driver = { - .driver = { - .name = "ti-pktdma", - .of_match_table = pktdma_of_match, - .suppress_bind_attrs = true, - }, - .probe = udma_probe, -}; -builtin_platform_driver(pktdma_driver); +module_platform_driver(udma_driver); +MODULE_LICENSE("GPL v2"); /* Private interfaces to UDMA */ #include "k3-udma-private.c" -- GitLab From d15aae73a9f6c321167b9120f263df7dbc08d2ba Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 29 Sep 2022 16:48:20 -0700 Subject: [PATCH 035/694] dmaengine: ti: convert PSIL to be buildable as module Combine all the SoC specific files into a single lib that can be built-in or built as a module. Acked-by: Peter Ujfalusi Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20220929234820.940048-4-khilman@baylibre.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Kconfig | 3 ++- drivers/dma/ti/Makefile | 15 ++++++++------- drivers/dma/ti/k3-psil.c | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig index f196be3b222f9..2adc2cca10e92 100644 --- a/drivers/dma/ti/Kconfig +++ b/drivers/dma/ti/Kconfig @@ -56,7 +56,8 @@ config TI_K3_UDMA_GLUE_LAYER If unsure, say N. config TI_K3_PSIL - bool + tristate + default TI_K3_UDMA config TI_DMA_CROSSBAR bool diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index d3a303f0d7c62..b53d05b11ca5e 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -4,11 +4,12 @@ obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o -obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \ - k3-psil-am654.o \ - k3-psil-j721e.o \ - k3-psil-j7200.o \ - k3-psil-am64.o \ - k3-psil-j721s2.o \ - k3-psil-am62.o +k3-psil-lib-objs := k3-psil.o \ + k3-psil-am654.o \ + k3-psil-j721e.o \ + k3-psil-j7200.o \ + k3-psil-am64.o \ + k3-psil-j721s2.o \ + k3-psil-am62.o +obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index 761a384093d20..8b6533a1eeeb9 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -101,3 +102,4 @@ int psil_set_new_ep_config(struct device *dev, const char *name, return 0; } EXPORT_SYMBOL_GPL(psil_set_new_ep_config); +MODULE_LICENSE("GPL v2"); -- GitLab From 97c4cf380ff2d5a58ff13b9ac415ad998f623510 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Wed, 19 Oct 2022 09:39:33 +0300 Subject: [PATCH 036/694] dt-bindings: ingenic: Add support for the JZ4755 dmaengine Update documentation prior to adding driver changes. Acked-by: Krzysztof Kozlowski Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221019063934.3278444-2-lis8215@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ingenic,dma.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 3b0b3b919af8c..e42b8ce948db1 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -18,6 +18,7 @@ properties: - enum: - ingenic,jz4740-dma - ingenic,jz4725b-dma + - ingenic,jz4755-dma - ingenic,jz4760-dma - ingenic,jz4760-bdma - ingenic,jz4760-mdma -- GitLab From 042427ea0e415ea25468605f1b562f4ecec43541 Mon Sep 17 00:00:00 2001 From: Siarhei Volkau Date: Wed, 19 Oct 2022 09:39:34 +0300 Subject: [PATCH 037/694] dmaengine: JZ4780: Add support for the JZ4755. The JZ4755 has 4 DMA channels per DMA unit, two idential DMA units. The JZ4755 has the similar DMA engine to JZ4725b and it has the same bug as JZ4725b, see commit a40c94be2336. At least the JZ_SOC_DATA_BREAK_LINKS flag make it work much better, although not ideal. Reviewed-by: Paul Cercueil Tested-by: Siarhei Volkau Signed-off-by: Siarhei Volkau Link: https://lore.kernel.org/r/20221019063934.3278444-3-lis8215@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 2a483802d9eeb..9c1a6e9a9c030 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -1038,6 +1038,13 @@ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = { JZ_SOC_DATA_BREAK_LINKS, }; +static const struct jz4780_dma_soc_data jz4755_dma_soc_data = { + .nb_channels = 4, + .transfer_ord_max = 5, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | + JZ_SOC_DATA_BREAK_LINKS, +}; + static const struct jz4780_dma_soc_data jz4760_dma_soc_data = { .nb_channels = 5, .transfer_ord_max = 6, @@ -1101,6 +1108,7 @@ static const struct jz4780_dma_soc_data x1830_dma_soc_data = { static const struct of_device_id jz4780_dma_dt_match[] = { { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data }, { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data }, + { .compatible = "ingenic,jz4755-dma", .data = &jz4755_dma_soc_data }, { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data }, { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data }, { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data }, -- GitLab From c3b63380f52a5cc945c092259c3545fb4915719d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Oct 2022 18:12:50 +0200 Subject: [PATCH 038/694] dmaengine: idma64: Make idma64_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function idma64_remove() returns zero unconditionally. Make it return void. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221014161250.468687-1-u.kleine-koenig@pengutronix.de Signed-off-by: Vinod Koul --- drivers/dma/idma64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index f4c07ad3be15b..c33087c5cd021 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -600,7 +600,7 @@ static int idma64_probe(struct idma64_chip *chip) return 0; } -static int idma64_remove(struct idma64_chip *chip) +static void idma64_remove(struct idma64_chip *chip) { struct idma64 *idma64 = chip->idma64; unsigned short i; @@ -618,8 +618,6 @@ static int idma64_remove(struct idma64_chip *chip) tasklet_kill(&idma64c->vchan.task); } - - return 0; } /* ---------------------------------------------------------------------- */ @@ -664,7 +662,9 @@ static int idma64_platform_remove(struct platform_device *pdev) { struct idma64_chip *chip = platform_get_drvdata(pdev); - return idma64_remove(chip); + idma64_remove(chip); + + return 0; } static int __maybe_unused idma64_pm_suspend(struct device *dev) -- GitLab From 91123b37e8a99cc489d5bdcfebd1c25f29382504 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 1 Oct 2022 04:15:28 +0800 Subject: [PATCH 039/694] dmaengine: idxd: Make max batch size attributes in sysfs invisible for Intel IAA In current code, dev.max_batch_size and wq.max_batch_size attributes in sysfs are exposed to user to show or update the values. >From Intel IAA spec [1], Intel IAA does not support batch processing. So these sysfs attributes should not be supported on IAA device. Fix this issue by making the attributes of max_batch_size invisible in sysfs through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that the attributes are not visible when the device does not support batch. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: e7184b159dd3 ("dmaengine: idxd: add support for configurable max wq batch size") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20220930201528.18621-3-xiaochen.shen@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 2 ++ drivers/dma/idxd/sysfs.c | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 8e2c2c405db22..69e2d9155e0d9 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -22,6 +22,7 @@ Date: Oct 25, 2019 KernelVersion: 5.6.0 Contact: dmaengine@vger.kernel.org Description: The largest number of work descriptors in a batch. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/dsa/max_work_queues_size Date: Oct 25, 2019 @@ -205,6 +206,7 @@ KernelVersion: 5.10.0 Contact: dmaengine@vger.kernel.org Description: The max batch size for this workqueue. Cannot exceed device max batch size. Configurable parameter. + It's not visible when the device does not support batch. What: /sys/bus/dsa/devices/wq./ats_disable Date: Nov 13, 2020 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index bdaccf9e04363..f30aad90537bc 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1233,6 +1233,14 @@ static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, !idxd->hw.wq_cap.op_config; } +static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_wq_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1243,6 +1251,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_op_config_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1533,6 +1544,26 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_RW(cmd_status); +static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* Intel IAA does not support batch processing, make it invisible */ + return attr == &dev_attr_max_batch_size.attr && + idxd->data->type == IDXD_TYPE_IAX; +} + +static umode_t idxd_device_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static struct attribute *idxd_device_attributes[] = { &dev_attr_version.attr, &dev_attr_max_groups.attr, @@ -1560,6 +1591,7 @@ static struct attribute *idxd_device_attributes[] = { static const struct attribute_group idxd_device_attribute_group = { .attrs = idxd_device_attributes, + .is_visible = idxd_device_attr_visible, }; static const struct attribute_group *idxd_attribute_groups[] = { -- GitLab From 568aa6dd641f63166bb60d769e256789b3ac42d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Wed, 19 Oct 2022 15:23:24 +0200 Subject: [PATCH 040/694] dmaengine: apple-admac: Allocate cache SRAM to channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a previously unknown part of the controller interface: We have to assign SRAM carveouts to channels to store their in-flight samples in. So, obtain the size of the SRAM from a read-only register and divide it into 2K blocks for allocation to channels. The FIFO depths we configure will always fit into 2K. (This fixes audio artifacts during simultaneous playback/capture on multiple channels -- which looking back is fully accounted for by having had the caches in the DMA controller overlap in memory.) Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221019132324.8585-2-povik+lin@cutebit.org Signed-off-by: Vinod Koul --- drivers/dma/apple-admac.c | 102 +++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 317ca76ccafd2..8f27445994354 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -21,6 +21,12 @@ #define NCHANNELS_MAX 64 #define IRQ_NOUTPUTS 4 +/* + * For allocation purposes we split the cache + * memory into blocks of fixed size (given in bytes). + */ +#define SRAM_BLOCK 2048 + #define RING_WRITE_SLOT GENMASK(1, 0) #define RING_READ_SLOT GENMASK(5, 4) #define RING_FULL BIT(9) @@ -36,6 +42,9 @@ #define REG_TX_STOP 0x0004 #define REG_RX_START 0x0008 #define REG_RX_STOP 0x000c +#define REG_IMPRINT 0x0090 +#define REG_TX_SRAM_SIZE 0x0094 +#define REG_RX_SRAM_SIZE 0x0098 #define REG_CHAN_CTL(ch) (0x8000 + (ch) * 0x200) #define REG_CHAN_CTL_RST_RINGS BIT(0) @@ -53,7 +62,9 @@ #define BUS_WIDTH_FRAME_2_WORDS 0x10 #define BUS_WIDTH_FRAME_4_WORDS 0x20 -#define CHAN_BUFSIZE 0x8000 +#define REG_CHAN_SRAM_CARVEOUT(ch) (0x8050 + (ch) * 0x200) +#define CHAN_SRAM_CARVEOUT_SIZE GENMASK(31, 16) +#define CHAN_SRAM_CARVEOUT_BASE GENMASK(15, 0) #define REG_CHAN_FIFOCTL(ch) (0x8054 + (ch) * 0x200) #define CHAN_FIFOCTL_LIMIT GENMASK(31, 16) @@ -76,6 +87,8 @@ struct admac_chan { struct dma_chan chan; struct tasklet_struct tasklet; + u32 carveout; + spinlock_t lock; struct admac_tx *current_tx; int nperiod_acks; @@ -92,12 +105,24 @@ struct admac_chan { struct list_head to_free; }; +struct admac_sram { + u32 size; + /* + * SRAM_CARVEOUT has 16-bit fields, so the SRAM cannot be larger than + * 64K and a 32-bit bitfield over 2K blocks covers it. + */ + u32 allocated; +}; + struct admac_data { struct dma_device dma; struct device *dev; __iomem void *base; struct reset_control *rstc; + struct mutex cache_alloc_lock; + struct admac_sram txcache, rxcache; + int irq; int irq_index; int nchannels; @@ -118,6 +143,60 @@ struct admac_tx { struct list_head node; }; +static int admac_alloc_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 *out) +{ + struct admac_sram *sram; + int i, ret = 0, nblocks; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + mutex_lock(&ad->cache_alloc_lock); + + nblocks = sram->size / SRAM_BLOCK; + for (i = 0; i < nblocks; i++) + if (!(sram->allocated & BIT(i))) + break; + + if (i < nblocks) { + *out = FIELD_PREP(CHAN_SRAM_CARVEOUT_BASE, i * SRAM_BLOCK) | + FIELD_PREP(CHAN_SRAM_CARVEOUT_SIZE, SRAM_BLOCK); + sram->allocated |= BIT(i); + } else { + ret = -EBUSY; + } + + mutex_unlock(&ad->cache_alloc_lock); + + return ret; +} + +static void admac_free_sram_carveout(struct admac_data *ad, + enum dma_transfer_direction dir, + u32 carveout) +{ + struct admac_sram *sram; + u32 base = FIELD_GET(CHAN_SRAM_CARVEOUT_BASE, carveout); + int i; + + if (dir == DMA_MEM_TO_DEV) + sram = &ad->txcache; + else + sram = &ad->rxcache; + + if (WARN_ON(base >= sram->size)) + return; + + mutex_lock(&ad->cache_alloc_lock); + i = base / SRAM_BLOCK; + sram->allocated &= ~BIT(i); + mutex_unlock(&ad->cache_alloc_lock); +} + static void admac_modify(struct admac_data *ad, int reg, u32 mask, u32 val) { void __iomem *addr = ad->base + reg; @@ -466,15 +545,28 @@ static void admac_synchronize(struct dma_chan *chan) static int admac_alloc_chan_resources(struct dma_chan *chan) { struct admac_chan *adchan = to_admac_chan(chan); + struct admac_data *ad = adchan->host; + int ret; dma_cookie_init(&adchan->chan); + ret = admac_alloc_sram_carveout(ad, admac_chan_direction(adchan->no), + &adchan->carveout); + if (ret < 0) + return ret; + + writel_relaxed(adchan->carveout, + ad->base + REG_CHAN_SRAM_CARVEOUT(adchan->no)); return 0; } static void admac_free_chan_resources(struct dma_chan *chan) { + struct admac_chan *adchan = to_admac_chan(chan); + admac_terminate_all(chan); admac_synchronize(chan); + admac_free_sram_carveout(adchan->host, admac_chan_direction(adchan->no), + adchan->carveout); } static struct dma_chan *admac_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -712,6 +804,7 @@ static int admac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ad); ad->dev = &pdev->dev; ad->nchannels = nchannels; + mutex_init(&ad->cache_alloc_lock); /* * The controller has 4 IRQ outputs. Try them all until @@ -801,6 +894,13 @@ static int admac_probe(struct platform_device *pdev) goto free_irq; } + ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE); + ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE); + + dev_info(&pdev->dev, "Audio DMA Controller\n"); + dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n", + readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size); + return 0; free_irq: -- GitLab From cd0ab43ec91a6114ea309e9e72382fdb184e7b9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Oct 2022 17:03:32 +0200 Subject: [PATCH 041/694] dmaengine: remove iop-adma driver The iop32x platform was removed, so this driver is no longer needed. Cc: Dan Williams Signed-off-by: Arnd Bergmann Acked-by: Dan Williams Link: https://lore.kernel.org/r/20221019150410.3851944-10-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 8 - drivers/dma/Makefile | 1 - drivers/dma/iop-adma.c | 1554 ---------------------------------------- drivers/dma/iop-adma.h | 914 ----------------------- 4 files changed, 2477 deletions(-) delete mode 100644 drivers/dma/iop-adma.c delete mode 100644 drivers/dma/iop-adma.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 7524b62a8870a..b73fc89ba8776 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -357,14 +357,6 @@ config INTEL_IOATDMA If unsure, say N. -config INTEL_IOP_ADMA - tristate "Intel IOP32x ADMA support" - depends on ARCH_IOP32X || COMPILE_TEST - select DMA_ENGINE - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - help - Enable support for the Intel(R) IOP Series RAID engines. - config K3_DMA tristate "Hisilicon K3 DMA support" depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 10f7d42410017..5b55ada052a7f 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_INTEL_IDMA64) += idma64.o obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-y += idxd/ -obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c deleted file mode 100644 index 310b899d581fa..0000000000000 --- a/drivers/dma/iop-adma.c +++ /dev/null @@ -1,1554 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * offload engine driver for the Intel Xscale series of i/o processors - * Copyright © 2006, Intel Corporation. - */ - -/* - * This driver supports the asynchrounous DMA copy and RAID engines available - * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "iop-adma.h" -#include "dmaengine.h" - -#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) -#define to_iop_adma_device(dev) \ - container_of(dev, struct iop_adma_device, common) -#define tx_to_iop_adma_slot(tx) \ - container_of(tx, struct iop_adma_desc_slot, async_tx) - -/** - * iop_adma_free_slots - flags descriptor slots for reuse - * @slot: Slot to free - * Caller must hold &iop_chan->lock while calling this function - */ -static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) -{ - int stride = slot->slots_per_op; - - while (stride--) { - slot->slots_per_op = 0; - slot = list_entry(slot->slot_node.next, - struct iop_adma_desc_slot, - slot_node); - } -} - -static dma_cookie_t -iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *iop_chan, dma_cookie_t cookie) -{ - struct dma_async_tx_descriptor *tx = &desc->async_tx; - - BUG_ON(tx->cookie < 0); - if (tx->cookie > 0) { - cookie = tx->cookie; - tx->cookie = 0; - - /* call the callback (must not sleep or submit new - * operations to this channel) - */ - dmaengine_desc_get_callback_invoke(tx, NULL); - - dma_descriptor_unmap(tx); - if (desc->group_head) - desc->group_head = NULL; - } - - /* run dependent operations */ - dma_run_dependencies(tx); - - return cookie; -} - -static int -iop_adma_clean_slot(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *iop_chan) -{ - /* the client is allowed to attach dependent operations - * until 'ack' is set - */ - if (!async_tx_test_ack(&desc->async_tx)) - return 0; - - /* leave the last descriptor in the chain - * so we can append to it - */ - if (desc->chain_node.next == &iop_chan->chain) - return 1; - - dev_dbg(iop_chan->device->common.dev, - "\tfree slot: %d slots_per_op: %d\n", - desc->idx, desc->slots_per_op); - - list_del(&desc->chain_node); - iop_adma_free_slots(desc); - - return 0; -} - -static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; - dma_cookie_t cookie = 0; - u32 current_desc = iop_chan_get_current_descriptor(iop_chan); - int busy = iop_chan_is_busy(iop_chan); - int seen_current = 0, slot_cnt = 0, slots_per_op = 0; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - /* free completed slots from the chain starting with - * the oldest descriptor - */ - list_for_each_entry_safe(iter, _iter, &iop_chan->chain, - chain_node) { - pr_debug("\tcookie: %d slot: %d busy: %d " - "this_desc: %pad next_desc: %#llx ack: %d\n", - iter->async_tx.cookie, iter->idx, busy, - &iter->async_tx.phys, (u64)iop_desc_get_next_desc(iter), - async_tx_test_ack(&iter->async_tx)); - prefetch(_iter); - prefetch(&_iter->async_tx); - - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; - - /* stop the search if we reach the current descriptor and the - * channel is busy, or if it appears that the current descriptor - * needs to be re-read (i.e. has been appended to) - */ - if (iter->async_tx.phys == current_desc) { - BUG_ON(seen_current++); - if (busy || iop_desc_get_next_desc(iter)) - break; - } - - /* detect the start of a group transaction */ - if (!slot_cnt && !slots_per_op) { - slot_cnt = iter->slot_cnt; - slots_per_op = iter->slots_per_op; - if (slot_cnt <= slots_per_op) { - slot_cnt = 0; - slots_per_op = 0; - } - } - - if (slot_cnt) { - pr_debug("\tgroup++\n"); - if (!grp_start) - grp_start = iter; - slot_cnt -= slots_per_op; - } - - /* all the members of a group are complete */ - if (slots_per_op != 0 && slot_cnt == 0) { - struct iop_adma_desc_slot *grp_iter, *_grp_iter; - int end_of_chain = 0; - pr_debug("\tgroup end\n"); - - /* collect the total results */ - if (grp_start->xor_check_result) { - u32 zero_sum_result = 0; - slot_cnt = grp_start->slot_cnt; - grp_iter = grp_start; - - list_for_each_entry_from(grp_iter, - &iop_chan->chain, chain_node) { - zero_sum_result |= - iop_desc_get_zero_result(grp_iter); - pr_debug("\titer%d result: %d\n", - grp_iter->idx, zero_sum_result); - slot_cnt -= slots_per_op; - if (slot_cnt == 0) - break; - } - pr_debug("\tgrp_start->xor_check_result: %p\n", - grp_start->xor_check_result); - *grp_start->xor_check_result = zero_sum_result; - } - - /* clean up the group */ - slot_cnt = grp_start->slot_cnt; - grp_iter = grp_start; - list_for_each_entry_safe_from(grp_iter, _grp_iter, - &iop_chan->chain, chain_node) { - cookie = iop_adma_run_tx_complete_actions( - grp_iter, iop_chan, cookie); - - slot_cnt -= slots_per_op; - end_of_chain = iop_adma_clean_slot(grp_iter, - iop_chan); - - if (slot_cnt == 0 || end_of_chain) - break; - } - - /* the group should be complete at this point */ - BUG_ON(slot_cnt); - - slots_per_op = 0; - grp_start = NULL; - if (end_of_chain) - break; - else - continue; - } else if (slots_per_op) /* wait for group completion */ - continue; - - /* write back zero sum results (single descriptor case) */ - if (iter->xor_check_result && iter->async_tx.cookie) - *iter->xor_check_result = - iop_desc_get_zero_result(iter); - - cookie = iop_adma_run_tx_complete_actions( - iter, iop_chan, cookie); - - if (iop_adma_clean_slot(iter, iop_chan)) - break; - } - - if (cookie > 0) { - iop_chan->common.completed_cookie = cookie; - pr_debug("\tcompleted cookie %d\n", cookie); - } -} - -static void -iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) -{ - spin_lock_bh(&iop_chan->lock); - __iop_adma_slot_cleanup(iop_chan); - spin_unlock_bh(&iop_chan->lock); -} - -static void iop_adma_tasklet(struct tasklet_struct *t) -{ - struct iop_adma_chan *iop_chan = from_tasklet(iop_chan, t, - irq_tasklet); - - /* lockdep will flag depedency submissions as potentially - * recursive locking, this is not the case as a dependency - * submission will never recurse a channels submit routine. - * There are checks in async_tx.c to prevent this. - */ - spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); - __iop_adma_slot_cleanup(iop_chan); - spin_unlock(&iop_chan->lock); -} - -static struct iop_adma_desc_slot * -iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, - int slots_per_op) -{ - struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; - LIST_HEAD(chain); - int slots_found, retry = 0; - - /* start search from the last allocated descrtiptor - * if a contiguous allocation can not be found start searching - * from the beginning of the list - */ -retry: - slots_found = 0; - if (retry == 0) - iter = iop_chan->last_used; - else - iter = list_entry(&iop_chan->all_slots, - struct iop_adma_desc_slot, - slot_node); - - list_for_each_entry_safe_continue( - iter, _iter, &iop_chan->all_slots, slot_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); - if (iter->slots_per_op) { - /* give up after finding the first busy slot - * on the second pass through the list - */ - if (retry) - break; - - slots_found = 0; - continue; - } - - /* start the allocation if the slot is correctly aligned */ - if (!slots_found++) { - if (iop_desc_is_aligned(iter, slots_per_op)) - alloc_start = iter; - else { - slots_found = 0; - continue; - } - } - - if (slots_found == num_slots) { - struct iop_adma_desc_slot *alloc_tail = NULL; - struct iop_adma_desc_slot *last_used = NULL; - iter = alloc_start; - while (num_slots) { - int i; - dev_dbg(iop_chan->device->common.dev, - "allocated slot: %d " - "(desc %p phys: %#llx) slots_per_op %d\n", - iter->idx, iter->hw_desc, - (u64)iter->async_tx.phys, slots_per_op); - - /* pre-ack all but the last descriptor */ - if (num_slots != slots_per_op) - async_tx_ack(&iter->async_tx); - - list_add_tail(&iter->chain_node, &chain); - alloc_tail = iter; - iter->async_tx.cookie = 0; - iter->slot_cnt = num_slots; - iter->xor_check_result = NULL; - for (i = 0; i < slots_per_op; i++) { - iter->slots_per_op = slots_per_op - i; - last_used = iter; - iter = list_entry(iter->slot_node.next, - struct iop_adma_desc_slot, - slot_node); - } - num_slots -= slots_per_op; - } - alloc_tail->group_head = alloc_start; - alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->tx_list); - iop_chan->last_used = last_used; - iop_desc_clear_next_desc(alloc_start); - iop_desc_clear_next_desc(alloc_tail); - return alloc_tail; - } - } - if (!retry++) - goto retry; - - /* perform direct reclaim if the allocation fails */ - __iop_adma_slot_cleanup(iop_chan); - - return NULL; -} - -static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) -{ - dev_dbg(iop_chan->device->common.dev, "pending: %d\n", - iop_chan->pending); - - if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { - iop_chan->pending = 0; - iop_chan_append(iop_chan); - } -} - -static dma_cookie_t -iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); - struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); - struct iop_adma_desc_slot *grp_start, *old_chain_tail; - int slot_cnt; - dma_cookie_t cookie; - dma_addr_t next_dma; - - grp_start = sw_desc->group_head; - slot_cnt = grp_start->slot_cnt; - - spin_lock_bh(&iop_chan->lock); - cookie = dma_cookie_assign(tx); - - old_chain_tail = list_entry(iop_chan->chain.prev, - struct iop_adma_desc_slot, chain_node); - list_splice_init(&sw_desc->tx_list, - &old_chain_tail->chain_node); - - /* fix up the hardware chain */ - next_dma = grp_start->async_tx.phys; - iop_desc_set_next_desc(old_chain_tail, next_dma); - BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */ - - /* check for pre-chained descriptors */ - iop_paranoia(iop_desc_get_next_desc(sw_desc)); - - /* increment the pending count by the number of slots - * memcpy operations have a 1:1 (slot:operation) relation - * other operations are heavier and will pop the threshold - * more often. - */ - iop_chan->pending += slot_cnt; - iop_adma_check_threshold(iop_chan); - spin_unlock_bh(&iop_chan->lock); - - dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", - __func__, sw_desc->async_tx.cookie, sw_desc->idx); - - return cookie; -} - -static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); -static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); - -/** - * iop_adma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: allocate descriptor resources for this channel - * - * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To - * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be - * greater than 2x the number slots needed to satisfy a device->max_xor - * request. - * */ -static int iop_adma_alloc_chan_resources(struct dma_chan *chan) -{ - char *hw_desc; - dma_addr_t dma_desc; - int idx; - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *slot = NULL; - int init = iop_chan->slots_allocated ? 0 : 1; - struct iop_adma_platform_data *plat_data = - dev_get_platdata(&iop_chan->device->pdev->dev); - int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; - - /* Allocate descriptor slots */ - do { - idx = iop_chan->slots_allocated; - if (idx == num_descs_in_pool) - break; - - slot = kzalloc(sizeof(*slot), GFP_KERNEL); - if (!slot) { - printk(KERN_INFO "IOP ADMA Channel only initialized" - " %d descriptor slots", idx); - break; - } - hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; - slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; - - dma_async_tx_descriptor_init(&slot->async_tx, chan); - slot->async_tx.tx_submit = iop_adma_tx_submit; - INIT_LIST_HEAD(&slot->tx_list); - INIT_LIST_HEAD(&slot->chain_node); - INIT_LIST_HEAD(&slot->slot_node); - dma_desc = iop_chan->device->dma_desc_pool; - slot->async_tx.phys = dma_desc + idx * IOP_ADMA_SLOT_SIZE; - slot->idx = idx; - - spin_lock_bh(&iop_chan->lock); - iop_chan->slots_allocated++; - list_add_tail(&slot->slot_node, &iop_chan->all_slots); - spin_unlock_bh(&iop_chan->lock); - } while (iop_chan->slots_allocated < num_descs_in_pool); - - if (idx && !iop_chan->last_used) - iop_chan->last_used = list_entry(iop_chan->all_slots.next, - struct iop_adma_desc_slot, - slot_node); - - dev_dbg(iop_chan->device->common.dev, - "allocated %d descriptor slots last_used: %p\n", - iop_chan->slots_allocated, iop_chan->last_used); - - /* initialize the channel and the chain with a null operation */ - if (init) { - if (dma_has_cap(DMA_MEMCPY, - iop_chan->device->common.cap_mask)) - iop_chan_start_null_memcpy(iop_chan); - else if (dma_has_cap(DMA_XOR, - iop_chan->device->common.cap_mask)) - iop_chan_start_null_xor(iop_chan); - else - BUG(); - } - - return (idx > 0) ? idx : -ENOMEM; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_interrupt(grp_start, iop_chan); - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, "%s len: %zu\n", - __func__, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_memcpy(grp_start, flags); - iop_desc_set_byte_count(grp_start, iop_chan, len); - iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - iop_desc_set_memcpy_src_addr(grp_start, dma_src); - sw_desc->async_tx.flags = flags; - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, - dma_addr_t *dma_src, unsigned int src_cnt, size_t len, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, - "%s src_cnt: %d len: %zu flags: %lx\n", - __func__, src_cnt, len, flags); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_xor(grp_start, src_cnt, flags); - iop_desc_set_byte_count(grp_start, iop_chan, len); - iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_xor_src_addr(grp_start, src_cnt, - dma_src[src_cnt]); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, - unsigned int src_cnt, size_t len, u32 *result, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *grp_start; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - - dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n", - __func__, src_cnt, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - iop_desc_init_zero_sum(grp_start, src_cnt, flags); - iop_desc_set_zero_sum_byte_count(grp_start, len); - grp_start->xor_check_result = result; - pr_debug("\t%s: grp_start->xor_check_result: %p\n", - __func__, grp_start->xor_check_result); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_zero_sum_src_addr(grp_start, src_cnt, - dma_src[src_cnt]); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *g; - int slot_cnt, slots_per_op; - int continue_srcs; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, - "%s src_cnt: %d len: %zu flags: %lx\n", - __func__, src_cnt, len, flags); - - if (dmaf_p_disabled_continue(flags)) - continue_srcs = 1+src_cnt; - else if (dmaf_continue(flags)) - continue_srcs = 3+src_cnt; - else - continue_srcs = 0+src_cnt; - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - int i; - - g = sw_desc->group_head; - iop_desc_set_byte_count(g, iop_chan, len); - - /* even if P is disabled its destination address (bits - * [3:0]) must match Q. It is ok if P points to an - * invalid address, it won't be written. - */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1] & 0x7; - - iop_desc_set_pq_addr(g, dst); - sw_desc->async_tx.flags = flags; - for (i = 0; i < src_cnt; i++) - iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); - - /* if we are continuing a previous operation factor in - * the old p and q values, see the comment for dma_maxpq - * in include/linux/dmaengine.h - */ - if (dmaf_p_disabled_continue(flags)) - iop_desc_set_pq_src_addr(g, i++, dst[1], 1); - else if (dmaf_continue(flags)) { - iop_desc_set_pq_src_addr(g, i++, dst[0], 0); - iop_desc_set_pq_src_addr(g, i++, dst[1], 1); - iop_desc_set_pq_src_addr(g, i++, dst[1], 0); - } - iop_desc_init_pq(g, i, flags); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static struct dma_async_tx_descriptor * -iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, enum sum_check_flags *pqres, - unsigned long flags) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *sw_desc, *g; - int slot_cnt, slots_per_op; - - if (unlikely(!len)) - return NULL; - BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); - - dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n", - __func__, src_cnt, len); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - /* for validate operations p and q are tagged onto the - * end of the source list - */ - int pq_idx = src_cnt; - - g = sw_desc->group_head; - iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); - iop_desc_set_pq_zero_sum_byte_count(g, len); - g->pq_check_result = pqres; - pr_debug("\t%s: g->pq_check_result: %p\n", - __func__, g->pq_check_result); - sw_desc->async_tx.flags = flags; - while (src_cnt--) - iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, - src[src_cnt], - scf[src_cnt]); - iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); - } - spin_unlock_bh(&iop_chan->lock); - - return sw_desc ? &sw_desc->async_tx : NULL; -} - -static void iop_adma_free_chan_resources(struct dma_chan *chan) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - struct iop_adma_desc_slot *iter, *_iter; - int in_use_descs = 0; - - iop_adma_slot_cleanup(iop_chan); - - spin_lock_bh(&iop_chan->lock); - list_for_each_entry_safe(iter, _iter, &iop_chan->chain, - chain_node) { - in_use_descs++; - list_del(&iter->chain_node); - } - list_for_each_entry_safe_reverse( - iter, _iter, &iop_chan->all_slots, slot_node) { - list_del(&iter->slot_node); - kfree(iter); - iop_chan->slots_allocated--; - } - iop_chan->last_used = NULL; - - dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", - __func__, iop_chan->slots_allocated); - spin_unlock_bh(&iop_chan->lock); - - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", - in_use_descs - 1); -} - -/** - * iop_adma_status - poll the status of an ADMA transaction - * @chan: ADMA channel handle - * @cookie: ADMA transaction identifier - * @txstate: a holder for the current state of the channel or NULL - */ -static enum dma_status iop_adma_status(struct dma_chan *chan, - dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - int ret; - - ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - iop_adma_slot_cleanup(iop_chan); - - return dma_cookie_status(chan, cookie, txstate); -} - -static irqreturn_t iop_adma_eot_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - - dev_dbg(chan->device->common.dev, "%s\n", __func__); - - tasklet_schedule(&chan->irq_tasklet); - - iop_adma_device_clear_eot_status(chan); - - return IRQ_HANDLED; -} - -static irqreturn_t iop_adma_eoc_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - - dev_dbg(chan->device->common.dev, "%s\n", __func__); - - tasklet_schedule(&chan->irq_tasklet); - - iop_adma_device_clear_eoc_status(chan); - - return IRQ_HANDLED; -} - -static irqreturn_t iop_adma_err_handler(int irq, void *data) -{ - struct iop_adma_chan *chan = data; - unsigned long status = iop_chan_get_status(chan); - - dev_err(chan->device->common.dev, - "error ( %s%s%s%s%s%s%s)\n", - iop_is_err_int_parity(status, chan) ? "int_parity " : "", - iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", - iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", - iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", - iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", - iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", - iop_is_err_split_tx(status, chan) ? "split_tx " : ""); - - iop_adma_device_clear_err_status(chan); - - BUG(); - - return IRQ_HANDLED; -} - -static void iop_adma_issue_pending(struct dma_chan *chan) -{ - struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); - - if (iop_chan->pending) { - iop_chan->pending = 0; - iop_chan_append(iop_chan); - } -} - -/* - * Perform a transaction to verify the HW works. - */ -#define IOP_ADMA_TEST_SIZE 2000 - -static int iop_adma_memcpy_self_test(struct iop_adma_device *device) -{ - int i; - void *src, *dest; - dma_addr_t src_dma, dest_dma; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - struct dma_async_tx_descriptor *tx; - int err = 0; - struct iop_adma_chan *iop_chan; - - dev_dbg(device->common.dev, "%s\n", __func__); - - src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } - - /* Fill in src buffer */ - for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) - ((u8 *) src)[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - dest_dma = dma_map_single(dma_chan->device->dev, dest, - IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); - src_dma = dma_map_single(dma_chan->device->dev, src, - IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma, - IOP_ADMA_TEST_SIZE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(1); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - iop_chan = to_iop_adma_chan(dma_chan); - dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, - IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); - if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { - dev_err(dma_chan->device->dev, - "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ -static int -iop_adma_xor_val_self_test(struct iop_adma_device *device) -{ - int i, src_idx; - struct page *dest; - struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; - struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; - dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; - dma_addr_t dest_dma; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u8 cmp_byte = 0; - u32 cmp_word; - u32 zero_sum_result; - int err = 0; - struct iop_adma_chan *iop_chan; - - dev_dbg(device->common.dev, "%s\n", __func__); - - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { - xor_srcs[src_idx] = alloc_page(GFP_KERNEL); - if (!xor_srcs[src_idx]) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - } - - dest = alloc_page(GFP_KERNEL); - if (!dest) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - - /* Fill in src buffers */ - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { - u8 *ptr = page_address(xor_srcs[src_idx]); - for (i = 0; i < PAGE_SIZE; i++) - ptr[i] = (1 << src_idx); - } - - for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) - cmp_byte ^= (u8) (1 << src_idx); - - cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | - (cmp_byte << 8) | cmp_byte; - - memset(page_address(dest), 0, PAGE_SIZE); - - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - /* test xor */ - dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], - 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test xor timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - iop_chan = to_iop_adma_chan(dma_chan); - dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, - PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { - u32 *ptr = page_address(dest); - if (ptr[i] != cmp_word) { - dev_err(dma_chan->device->dev, - "Self-test xor failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - } - dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, - PAGE_SIZE, DMA_TO_DEVICE); - - /* skip zero sum if the capability is not present */ - if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) - goto free_resources; - - /* zero sum the sources with the destintation page */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - zero_sum_srcs[i] = xor_srcs[i]; - zero_sum_srcs[i] = dest; - - zero_sum_result = 1; - - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, - zero_sum_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test zero sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 0) { - dev_err(dma_chan->device->dev, - "Self-test zero sum failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - /* test for non-zero parity sum */ - zero_sum_result = 0; - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, - zero_sum_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dma_chan->device->dev, - "Self-test non-zero sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 1) { - dev_err(dma_chan->device->dev, - "Self-test non-zero sum failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - src_idx = IOP_ADMA_NUM_SRC_TEST; - while (src_idx--) - __free_page(xor_srcs[src_idx]); - __free_page(dest); - return err; -} - -#ifdef CONFIG_RAID6_PQ -static int -iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) -{ - /* combined sources, software pq results, and extra hw pq results */ - struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; - /* ptr to the extra hw pq buffers defined above */ - struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; - /* address conversion buffers (dma_map / page_address) */ - void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; - dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2]; - dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST]; - - int i; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u32 zero_sum_result; - int err = 0; - struct device *dev; - - dev_dbg(device->common.dev, "%s\n", __func__); - - for (i = 0; i < ARRAY_SIZE(pq); i++) { - pq[i] = alloc_page(GFP_KERNEL); - if (!pq[i]) { - while (i--) - __free_page(pq[i]); - return -ENOMEM; - } - } - - /* Fill in src buffers */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { - pq_sw[i] = page_address(pq[i]); - memset(pq_sw[i], 0x11111111 * (1<common.channels.next, - struct dma_chan, - device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - dev = dma_chan->device->dev; - - /* initialize the dests */ - memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); - memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); - - /* test pq */ - pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); - pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, - IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, - PAGE_SIZE, - DMA_PREP_INTERRUPT | - DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test pq timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); - - if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], - page_address(pq_hw[0]), PAGE_SIZE) != 0) { - dev_err(dev, "Self-test p failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], - page_address(pq_hw[1]), PAGE_SIZE) != 0) { - dev_err(dev, "Self-test q failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - /* test correct zero sum using the software generated pq values */ - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - zero_sum_result = ~0; - tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], - pq_src, IOP_ADMA_NUM_SRC_TEST, - raid6_gfexp, PAGE_SIZE, &zero_sum_result, - DMA_PREP_INTERRUPT|DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != 0) { - dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", - zero_sum_result); - err = -ENODEV; - goto free_resources; - } - - /* test incorrect zero sum */ - i = IOP_ADMA_NUM_SRC_TEST; - memset(pq_sw[i] + 100, 0, 100); - memset(pq_sw[i+1] + 200, 0, 200); - for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) - pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - - zero_sum_result = 0; - tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], - pq_src, IOP_ADMA_NUM_SRC_TEST, - raid6_gfexp, PAGE_SIZE, &zero_sum_result, - DMA_PREP_INTERRUPT|DMA_CTRL_ACK); - - cookie = iop_adma_tx_submit(tx); - iop_adma_issue_pending(dma_chan); - msleep(8); - - if (iop_adma_status(dma_chan, cookie, NULL) != - DMA_COMPLETE) { - dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { - dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", - zero_sum_result); - err = -ENODEV; - goto free_resources; - } - -free_resources: - iop_adma_free_chan_resources(dma_chan); -out: - i = ARRAY_SIZE(pq); - while (i--) - __free_page(pq[i]); - return err; -} -#endif - -static int iop_adma_remove(struct platform_device *dev) -{ - struct iop_adma_device *device = platform_get_drvdata(dev); - struct dma_chan *chan, *_chan; - struct iop_adma_chan *iop_chan; - struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev); - - dma_async_device_unregister(&device->common); - - dma_free_coherent(&dev->dev, plat_data->pool_size, - device->dma_desc_pool_virt, device->dma_desc_pool); - - list_for_each_entry_safe(chan, _chan, &device->common.channels, - device_node) { - iop_chan = to_iop_adma_chan(chan); - list_del(&chan->device_node); - kfree(iop_chan); - } - kfree(device); - - return 0; -} - -static int iop_adma_probe(struct platform_device *pdev) -{ - struct resource *res; - int ret = 0, i; - struct iop_adma_device *adev; - struct iop_adma_chan *iop_chan; - struct dma_device *dma_dev; - struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - - if (!devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - adev = kzalloc(sizeof(*adev), GFP_KERNEL); - if (!adev) - return -ENOMEM; - dma_dev = &adev->common; - - /* allocate coherent memory for hardware descriptors - * note: writecombine gives slightly better performance, but - * requires that we explicitly flush the writes - */ - adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev, - plat_data->pool_size, - &adev->dma_desc_pool, - GFP_KERNEL); - if (!adev->dma_desc_pool_virt) { - ret = -ENOMEM; - goto err_free_adev; - } - - dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %pad\n", - __func__, adev->dma_desc_pool_virt, &adev->dma_desc_pool); - - adev->id = plat_data->hw_id; - - /* discover transaction capabilites from the platform data */ - dma_dev->cap_mask = plat_data->cap_mask; - - adev->pdev = pdev; - platform_set_drvdata(pdev, adev); - - INIT_LIST_HEAD(&dma_dev->channels); - - /* set base routines */ - dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; - dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; - dma_dev->device_tx_status = iop_adma_status; - dma_dev->device_issue_pending = iop_adma_issue_pending; - dma_dev->dev = &pdev->dev; - - /* set prep routines based on capability */ - if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) - dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; - if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - dma_dev->max_xor = iop_adma_get_max_xor(); - dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; - } - if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) - dma_dev->device_prep_dma_xor_val = - iop_adma_prep_dma_xor_val; - if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { - dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); - dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; - } - if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) - dma_dev->device_prep_dma_pq_val = - iop_adma_prep_dma_pq_val; - if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) - dma_dev->device_prep_dma_interrupt = - iop_adma_prep_dma_interrupt; - - iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); - if (!iop_chan) { - ret = -ENOMEM; - goto err_free_dma; - } - iop_chan->device = adev; - - iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!iop_chan->mmr_base) { - ret = -ENOMEM; - goto err_free_iop_chan; - } - tasklet_setup(&iop_chan->irq_tasklet, iop_adma_tasklet); - - /* clear errors before enabling interrupts */ - iop_adma_device_clear_err_status(iop_chan); - - for (i = 0; i < 3; i++) { - static const irq_handler_t handler[] = { - iop_adma_eot_handler, - iop_adma_eoc_handler, - iop_adma_err_handler - }; - int irq = platform_get_irq(pdev, i); - if (irq < 0) { - ret = -ENXIO; - goto err_free_iop_chan; - } else { - ret = devm_request_irq(&pdev->dev, irq, - handler[i], 0, pdev->name, iop_chan); - if (ret) - goto err_free_iop_chan; - } - } - - spin_lock_init(&iop_chan->lock); - INIT_LIST_HEAD(&iop_chan->chain); - INIT_LIST_HEAD(&iop_chan->all_slots); - iop_chan->common.device = dma_dev; - dma_cookie_init(&iop_chan->common); - list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); - - if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { - ret = iop_adma_memcpy_self_test(adev); - dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); - if (ret) - goto err_free_iop_chan; - } - - if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - ret = iop_adma_xor_val_self_test(adev); - dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); - if (ret) - goto err_free_iop_chan; - } - - if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && - dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { - #ifdef CONFIG_RAID6_PQ - ret = iop_adma_pq_zero_sum_self_test(adev); - dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); - #else - /* can not test raid6, so do not publish capability */ - dma_cap_clear(DMA_PQ, dma_dev->cap_mask); - dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); - ret = 0; - #endif - if (ret) - goto err_free_iop_chan; - } - - dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", - dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", - dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", - dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", - dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); - - dma_async_device_register(dma_dev); - goto out; - - err_free_iop_chan: - kfree(iop_chan); - err_free_dma: - dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, - adev->dma_desc_pool_virt, adev->dma_desc_pool); - err_free_adev: - kfree(adev); - out: - return ret; -} - -static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *sw_desc, *grp_start; - dma_cookie_t cookie; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - - list_splice_init(&sw_desc->tx_list, &iop_chan->chain); - async_tx_ack(&sw_desc->async_tx); - iop_desc_init_memcpy(grp_start, 0); - iop_desc_set_byte_count(grp_start, iop_chan, 0); - iop_desc_set_dest_addr(grp_start, iop_chan, 0); - iop_desc_set_memcpy_src_addr(grp_start, 0); - - cookie = dma_cookie_assign(&sw_desc->async_tx); - - /* initialize the completed cookie to be less than - * the most recently used cookie - */ - iop_chan->common.completed_cookie = cookie - 1; - - /* channel should not be busy */ - BUG_ON(iop_chan_is_busy(iop_chan)); - - /* clear any prior error-status bits */ - iop_adma_device_clear_err_status(iop_chan); - - /* disable operation */ - iop_chan_disable(iop_chan); - - /* set the descriptor address */ - iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); - - /* 1/ don't add pre-chained descriptors - * 2/ dummy read to flush next_desc write - */ - BUG_ON(iop_desc_get_next_desc(sw_desc)); - - /* run the descriptor */ - iop_chan_enable(iop_chan); - } else - dev_err(iop_chan->device->common.dev, - "failed to allocate null descriptor\n"); - spin_unlock_bh(&iop_chan->lock); -} - -static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) -{ - struct iop_adma_desc_slot *sw_desc, *grp_start; - dma_cookie_t cookie; - int slot_cnt, slots_per_op; - - dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); - - spin_lock_bh(&iop_chan->lock); - slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); - sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); - if (sw_desc) { - grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->tx_list, &iop_chan->chain); - async_tx_ack(&sw_desc->async_tx); - iop_desc_init_null_xor(grp_start, 2, 0); - iop_desc_set_byte_count(grp_start, iop_chan, 0); - iop_desc_set_dest_addr(grp_start, iop_chan, 0); - iop_desc_set_xor_src_addr(grp_start, 0, 0); - iop_desc_set_xor_src_addr(grp_start, 1, 0); - - cookie = dma_cookie_assign(&sw_desc->async_tx); - - /* initialize the completed cookie to be less than - * the most recently used cookie - */ - iop_chan->common.completed_cookie = cookie - 1; - - /* channel should not be busy */ - BUG_ON(iop_chan_is_busy(iop_chan)); - - /* clear any prior error-status bits */ - iop_adma_device_clear_err_status(iop_chan); - - /* disable operation */ - iop_chan_disable(iop_chan); - - /* set the descriptor address */ - iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); - - /* 1/ don't add pre-chained descriptors - * 2/ dummy read to flush next_desc write - */ - BUG_ON(iop_desc_get_next_desc(sw_desc)); - - /* run the descriptor */ - iop_chan_enable(iop_chan); - } else - dev_err(iop_chan->device->common.dev, - "failed to allocate null descriptor\n"); - spin_unlock_bh(&iop_chan->lock); -} - -static struct platform_driver iop_adma_driver = { - .probe = iop_adma_probe, - .remove = iop_adma_remove, - .driver = { - .name = "iop-adma", - }, -}; - -module_platform_driver(iop_adma_driver); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("IOP ADMA Engine Driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:iop-adma"); diff --git a/drivers/dma/iop-adma.h b/drivers/dma/iop-adma.h deleted file mode 100644 index d44eabb6f5ebd..0000000000000 --- a/drivers/dma/iop-adma.h +++ /dev/null @@ -1,914 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2006, Intel Corporation. - */ -#ifndef _ADMA_H -#define _ADMA_H -#include -#include -#include - -/* Memory copy units */ -#define DMA_CCR(chan) (chan->mmr_base + 0x0) -#define DMA_CSR(chan) (chan->mmr_base + 0x4) -#define DMA_DAR(chan) (chan->mmr_base + 0xc) -#define DMA_NDAR(chan) (chan->mmr_base + 0x10) -#define DMA_PADR(chan) (chan->mmr_base + 0x14) -#define DMA_PUADR(chan) (chan->mmr_base + 0x18) -#define DMA_LADR(chan) (chan->mmr_base + 0x1c) -#define DMA_BCR(chan) (chan->mmr_base + 0x20) -#define DMA_DCR(chan) (chan->mmr_base + 0x24) - -/* Application accelerator unit */ -#define AAU_ACR(chan) (chan->mmr_base + 0x0) -#define AAU_ASR(chan) (chan->mmr_base + 0x4) -#define AAU_ADAR(chan) (chan->mmr_base + 0x8) -#define AAU_ANDAR(chan) (chan->mmr_base + 0xc) -#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2))) -#define AAU_DAR(chan) (chan->mmr_base + 0x20) -#define AAU_ABCR(chan) (chan->mmr_base + 0x24) -#define AAU_ADCR(chan) (chan->mmr_base + 0x28) -#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2))) -#define AAU_EDCR0_IDX 8 -#define AAU_EDCR1_IDX 17 -#define AAU_EDCR2_IDX 26 - -struct iop3xx_aau_desc_ctrl { - unsigned int int_en:1; - unsigned int blk1_cmd_ctrl:3; - unsigned int blk2_cmd_ctrl:3; - unsigned int blk3_cmd_ctrl:3; - unsigned int blk4_cmd_ctrl:3; - unsigned int blk5_cmd_ctrl:3; - unsigned int blk6_cmd_ctrl:3; - unsigned int blk7_cmd_ctrl:3; - unsigned int blk8_cmd_ctrl:3; - unsigned int blk_ctrl:2; - unsigned int dual_xor_en:1; - unsigned int tx_complete:1; - unsigned int zero_result_err:1; - unsigned int zero_result_en:1; - unsigned int dest_write_en:1; -}; - -struct iop3xx_aau_e_desc_ctrl { - unsigned int reserved:1; - unsigned int blk1_cmd_ctrl:3; - unsigned int blk2_cmd_ctrl:3; - unsigned int blk3_cmd_ctrl:3; - unsigned int blk4_cmd_ctrl:3; - unsigned int blk5_cmd_ctrl:3; - unsigned int blk6_cmd_ctrl:3; - unsigned int blk7_cmd_ctrl:3; - unsigned int blk8_cmd_ctrl:3; - unsigned int reserved2:7; -}; - -struct iop3xx_dma_desc_ctrl { - unsigned int pci_transaction:4; - unsigned int int_en:1; - unsigned int dac_cycle_en:1; - unsigned int mem_to_mem_en:1; - unsigned int crc_data_tx_en:1; - unsigned int crc_gen_en:1; - unsigned int crc_seed_dis:1; - unsigned int reserved:21; - unsigned int crc_tx_complete:1; -}; - -struct iop3xx_desc_dma { - u32 next_desc; - union { - u32 pci_src_addr; - u32 pci_dest_addr; - u32 src_addr; - }; - union { - u32 upper_pci_src_addr; - u32 upper_pci_dest_addr; - }; - union { - u32 local_pci_src_addr; - u32 local_pci_dest_addr; - u32 dest_addr; - }; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_dma_desc_ctrl desc_ctrl_field; - }; - u32 crc_addr; -}; - -struct iop3xx_desc_aau { - u32 next_desc; - u32 src[4]; - u32 dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - union { - u32 src_addr; - u32 e_desc_ctrl; - struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; - } src_edc[31]; -}; - -struct iop3xx_aau_gfmr { - unsigned int gfmr1:8; - unsigned int gfmr2:8; - unsigned int gfmr3:8; - unsigned int gfmr4:8; -}; - -struct iop3xx_desc_pq_xor { - u32 next_desc; - u32 src[3]; - union { - u32 data_mult1; - struct iop3xx_aau_gfmr data_mult1_field; - }; - u32 dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - union { - u32 src_addr; - u32 e_desc_ctrl; - struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; - u32 data_multiplier; - struct iop3xx_aau_gfmr data_mult_field; - u32 reserved; - } src_edc_gfmr[19]; -}; - -struct iop3xx_desc_dual_xor { - u32 next_desc; - u32 src0_addr; - u32 src1_addr; - u32 h_src_addr; - u32 d_src_addr; - u32 h_dest_addr; - u32 byte_count; - union { - u32 desc_ctrl; - struct iop3xx_aau_desc_ctrl desc_ctrl_field; - }; - u32 d_dest_addr; -}; - -union iop3xx_desc { - struct iop3xx_desc_aau *aau; - struct iop3xx_desc_dma *dma; - struct iop3xx_desc_pq_xor *pq_xor; - struct iop3xx_desc_dual_xor *dual_xor; - void *ptr; -}; - -/* No support for p+q operations */ -static inline int -iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op) -{ - BUG(); - return 0; -} - -static inline void -iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, - dma_addr_t addr, unsigned char coef) -{ - BUG(); -} - -static inline int -iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op) -{ - BUG(); - return 0; -} - -static inline void -iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - BUG(); -} - -static inline void -iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) -{ - BUG(); -} - -#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr - -static inline void -iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, - dma_addr_t *src) -{ - BUG(); -} - -static inline int iop_adma_get_max_xor(void) -{ - return 32; -} - -static inline int iop_adma_get_max_pq(void) -{ - BUG(); - return 0; -} - -static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) -{ - int id = chan->device->id; - - switch (id) { - case DMA0_ID: - case DMA1_ID: - return __raw_readl(DMA_DAR(chan)); - case AAU_ID: - return __raw_readl(AAU_ADAR(chan)); - default: - BUG(); - } - return 0; -} - -static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, - u32 next_desc_addr) -{ - int id = chan->device->id; - - switch (id) { - case DMA0_ID: - case DMA1_ID: - __raw_writel(next_desc_addr, DMA_NDAR(chan)); - break; - case AAU_ID: - __raw_writel(next_desc_addr, AAU_ANDAR(chan)); - break; - } - -} - -#define IOP_ADMA_STATUS_BUSY (1 << 10) -#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024) -#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024) -#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) - -static inline int iop_chan_is_busy(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0; -} - -static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc, - int num_slots) -{ - /* num_slots will only ever be 1, 2, 4, or 8 */ - return (desc->idx & (num_slots - 1)) ? 0 : 1; -} - -/* to do: support large (i.e. > hw max) buffer sizes */ -static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) -{ - *slots_per_op = 1; - return 1; -} - -/* to do: support large (i.e. > hw max) buffer sizes */ -static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) -{ - *slots_per_op = 1; - return 1; -} - -static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - static const char slot_count_table[] = { - 1, 1, 1, 1, /* 01 - 04 */ - 2, 2, 2, 2, /* 05 - 08 */ - 4, 4, 4, 4, /* 09 - 12 */ - 4, 4, 4, 4, /* 13 - 16 */ - 8, 8, 8, 8, /* 17 - 20 */ - 8, 8, 8, 8, /* 21 - 24 */ - 8, 8, 8, 8, /* 25 - 28 */ - 8, 8, 8, 8, /* 29 - 32 */ - }; - *slots_per_op = slot_count_table[src_cnt - 1]; - return *slots_per_op; -} - -static inline int -iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return iop_chan_memcpy_slot_count(0, slots_per_op); - case AAU_ID: - return iop3xx_aau_xor_slot_count(0, 2, slots_per_op); - default: - BUG(); - } - return 0; -} - -static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); - - if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT) - return slot_cnt; - - len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; - while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) { - len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; - slot_cnt += *slots_per_op; - } - - slot_cnt += *slots_per_op; - - return slot_cnt; -} - -/* zero sum on iop3xx is limited to 1k at a time so it requires multiple - * descriptors - */ -static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, - int *slots_per_op) -{ - int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); - - if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) - return slot_cnt; - - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - slot_cnt += *slots_per_op; - } - - slot_cnt += *slots_per_op; - - return slot_cnt; -} - -static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->byte_count; - case AAU_ID: - return hw_desc.aau->byte_count; - default: - BUG(); - } - return 0; -} - -/* translate the src_idx to a descriptor word index */ -static inline int __desc_idx(int src_idx) -{ - static const int desc_idx_table[] = { 0, 0, 0, 0, - 0, 1, 2, 3, - 5, 6, 7, 8, - 9, 10, 11, 12, - 14, 15, 16, 17, - 18, 19, 20, 21, - 23, 24, 25, 26, - 27, 28, 29, 30, - }; - - return desc_idx_table[src_idx]; -} - -static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - int src_idx) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->src_addr; - case AAU_ID: - break; - default: - BUG(); - } - - if (src_idx < 4) - return hw_desc.aau->src[src_idx]; - else - return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr; -} - -static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc, - int src_idx, dma_addr_t addr) -{ - if (src_idx < 4) - hw_desc->src[src_idx] = addr; - else - hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr; -} - -static inline void -iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags) -{ - struct iop3xx_desc_dma *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_dma_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - u_desc_ctrl.field.mem_to_mem_en = 1; - u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */ - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; - hw_desc->upper_pci_src_addr = 0; - hw_desc->crc_addr = 0; -} - -static inline void -iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */ - u_desc_ctrl.field.dest_write_en = 1; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; -} - -static inline u32 -iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, - unsigned long flags) -{ - int i, shift; - u32 edcr; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - switch (src_cnt) { - case 25 ... 32: - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - edcr = 0; - shift = 1; - for (i = 24; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr; - src_cnt = 24; - fallthrough; - case 17 ... 24: - if (!u_desc_ctrl.field.blk_ctrl) { - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - } - edcr = 0; - shift = 1; - for (i = 16; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr; - src_cnt = 16; - fallthrough; - case 9 ... 16: - if (!u_desc_ctrl.field.blk_ctrl) - u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ - edcr = 0; - shift = 1; - for (i = 8; i < src_cnt; i++) { - edcr |= (1 << shift); - shift += 3; - } - hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr; - src_cnt = 8; - fallthrough; - case 2 ... 8: - shift = 1; - for (i = 0; i < src_cnt; i++) { - u_desc_ctrl.value |= (1 << shift); - shift += 3; - } - - if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) - u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ - } - - u_desc_ctrl.field.dest_write_en = 1; - u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */ - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; - - return u_desc_ctrl.value; -} - -static inline void -iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags); -} - -/* return the number of operations */ -static inline int -iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - int i, j; - - hw_desc = desc->hw_desc; - - for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, j++) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags); - u_desc_ctrl.field.dest_write_en = 0; - u_desc_ctrl.field.zero_result_en = 1; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - iter->desc_ctrl = u_desc_ctrl.value; - - /* for the subsequent descriptors preserve the store queue - * and chain them together - */ - if (i) { - prev_hw_desc = - iop_hw_desc_slot_idx(hw_desc, i - slots_per_op); - prev_hw_desc->next_desc = - (u32) (desc->async_tx.phys + (i << 5)); - } - } - - return j; -} - -static inline void -iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, - unsigned long flags) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop3xx_aau_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = 0; - switch (src_cnt) { - case 25 ... 32: - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - fallthrough; - case 17 ... 24: - if (!u_desc_ctrl.field.blk_ctrl) { - hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; - u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ - } - hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0; - fallthrough; - case 9 ... 16: - if (!u_desc_ctrl.field.blk_ctrl) - u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ - hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0; - fallthrough; - case 1 ... 8: - if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) - u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ - } - - u_desc_ctrl.field.dest_write_en = 0; - u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; - hw_desc->desc_ctrl = u_desc_ctrl.value; -} - -static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - u32 byte_count) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - hw_desc.dma->byte_count = byte_count; - break; - case AAU_ID: - hw_desc.aau->byte_count = byte_count; - break; - default: - BUG(); - } -} - -static inline void -iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - iop_desc_init_memcpy(desc, 1); - hw_desc.dma->byte_count = 0; - hw_desc.dma->dest_addr = 0; - hw_desc.dma->src_addr = 0; - break; - case AAU_ID: - iop_desc_init_null_xor(desc, 2, 1); - hw_desc.aau->byte_count = 0; - hw_desc.aau->dest_addr = 0; - hw_desc.aau->src[0] = 0; - hw_desc.aau->src[1] = 0; - break; - default: - BUG(); - } -} - -static inline void -iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) -{ - int slots_per_op = desc->slots_per_op; - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int i = 0; - - if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - hw_desc->byte_count = len; - } else { - do { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; - i += slots_per_op; - } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); - - iter = iop_hw_desc_slot_idx(hw_desc, i); - iter->byte_count = len; - } -} - -static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan, - dma_addr_t addr) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - hw_desc.dma->dest_addr = addr; - break; - case AAU_ID: - hw_desc.aau->dest_addr = addr; - break; - default: - BUG(); - } -} - -static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, - dma_addr_t addr) -{ - struct iop3xx_desc_dma *hw_desc = desc->hw_desc; - hw_desc->src_addr = addr; -} - -static inline void -iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx, - dma_addr_t addr) -{ - - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - int i; - - for (i = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); - } -} - -static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, - int src_idx, dma_addr_t addr) -{ - - struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; - int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; - int i; - - for (i = 0; (slot_cnt -= slots_per_op) >= 0; - i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) { - iter = iop_hw_desc_slot_idx(hw_desc, i); - iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); - } -} - -static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, - u32 next_desc_addr) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - iop_paranoia(hw_desc.dma->next_desc); - hw_desc.dma->next_desc = next_desc_addr; -} - -static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - return hw_desc.dma->next_desc; -} - -static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) -{ - /* hw_desc->next_desc is the same location for all channels */ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - hw_desc.dma->next_desc = 0; -} - -static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, - u32 val) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - hw_desc->src[0] = val; -} - -static inline enum sum_check_flags -iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) -{ - struct iop3xx_desc_aau *hw_desc = desc->hw_desc; - struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; - - iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err << SUM_CHECK_P; -} - -static inline void iop_chan_append(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl; - - dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - dma_chan_ctrl |= 0x2; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) -{ - return __raw_readl(DMA_CSR(chan)); -} - -static inline void iop_chan_disable(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - dma_chan_ctrl &= ~1; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline void iop_chan_enable(struct iop_adma_chan *chan) -{ - u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); - - dma_chan_ctrl |= 1; - __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); -} - -static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - status &= (1 << 9); - __raw_writel(status, DMA_CSR(chan)); -} - -static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - status &= (1 << 8); - __raw_writel(status, DMA_CSR(chan)); -} - -static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) -{ - u32 status = __raw_readl(DMA_CSR(chan)); - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1); - break; - case AAU_ID: - status &= (1 << 5); - break; - default: - BUG(); - } - - __raw_writel(status, DMA_CSR(chan)); -} - -static inline int -iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) -{ - return 0; -} - -static inline int -iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) -{ - return test_bit(5, &status); -} - -static inline int -iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(2, &status); - default: - return 0; - } -} - -static inline int -iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(3, &status); - default: - return 0; - } -} - -static inline int -iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) -{ - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return test_bit(1, &status); - default: - return 0; - } -} -#endif /* _ADMA_H */ -- GitLab From b203c67ebe752c8f2a2babf5e58d244c82680922 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Sat, 22 Oct 2022 01:43:40 +0000 Subject: [PATCH 042/694] csky: add arch support current_stack_pointer To follow the existing per-arch conventions, using "current_stack_pointer" to set sp. This will let it be used in non-arch places(like HARDENED_USERCOPY). Refer to the implementation of riscv commit fdecfea09328 ("riscv: Rename "sp_in_global" to "current_stack_pointer""). Link: https://lore.kernel.org/lkml/20220224060411.1855683-1-keescook@chromium.org/ Signed-off-by: Tong Tiangen Signed-off-by: Guo Ren --- arch/csky/Kconfig | 1 + arch/csky/include/asm/processor.h | 2 ++ arch/csky/kernel/stacktrace.c | 6 ++---- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index adee6ab36862e..2236b5c0c2136 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -9,6 +9,7 @@ config CSKY select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 63ad71fab30d7..ea75d72dea869 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -84,4 +84,6 @@ unsigned long __get_wchan(struct task_struct *p); #define cpu_relax() barrier() +register unsigned long current_stack_pointer __asm__("sp"); + #endif /* __ASM_CSKY_PROCESSOR_H */ diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c index 9f78f5d215117..27ecd63e321bb 100644 --- a/arch/csky/kernel/stacktrace.c +++ b/arch/csky/kernel/stacktrace.c @@ -23,10 +23,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); const register unsigned long current_fp __asm__ ("r8"); fp = current_fp; - sp = current_sp; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ @@ -68,8 +67,7 @@ static void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); - sp = current_sp; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ -- GitLab From ce0ba954805e0783ceb7304d4fb357a02038e231 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 Oct 2022 21:16:48 +0100 Subject: [PATCH 043/694] csky: Kconfig: Fix spelling mistake "Meory" -> "Memory" There is a spelling mistake in a Kconfig option description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Guo Ren --- arch/csky/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 2236b5c0c2136..e0ecd1cc81a94 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -270,7 +270,7 @@ menuconfig HAVE_TCM bool "Tightly-Coupled/Sram Memory" depends on !COMPILE_TEST help - The implementation are not only used by TCM (Tightly-Coupled Meory) + The implementation are not only used by TCM (Tightly-Coupled Memory) but also used by sram on SOC bus. It follow existed linux tcm software interface, so that old tcm application codes could be re-used directly. -- GitLab From 894c792e3e24c2c15d8aac15aa89ec144468e1b0 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:36 -0700 Subject: [PATCH 044/694] MAINTAINERS: git://github -> https://github.com for terrelln Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Nick Terrell --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e04d944005ba8..4a2b7a9325dc2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22800,7 +22800,7 @@ ZSTD M: Nick Terrell S: Maintained B: https://github.com/facebook/zstd/issues -T: git git://github.com/terrelln/linux.git +T: git https://github.com/terrelln/linux.git F: include/linux/zstd* F: lib/zstd/ F: lib/decompress_unzstd.c -- GitLab From 7486f5c6e7b197400678f1bb603ac9e4027fb830 Mon Sep 17 00:00:00 2001 From: Jilin Yuan Date: Fri, 2 Sep 2022 09:32:12 +0800 Subject: [PATCH 045/694] lib: zstd: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: Jilin Yuan Signed-off-by: Nick Terrell --- lib/zstd/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c index a4e916008b3a7..73fff4c60149d 100644 --- a/lib/zstd/compress/zstd_compress.c +++ b/lib/zstd/compress/zstd_compress.c @@ -4441,7 +4441,7 @@ static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { size_t offsetBound; U32 windowSize = 1 << windowLog; - /* posInSrc represents the amount of data the the decoder would decode up to this point. + /* posInSrc represents the amount of data the decoder would decode up to this point. * As long as the amount of data decoded is less than or equal to window size, offsets may be * larger than the total length of output decoded in order to reference the dict, even larger than * window size. After output surpasses windowSize, we're limited to windowSize offsets again. -- GitLab From 19d7df98472851e1d2d11e00c177988d0f49683d Mon Sep 17 00:00:00 2001 From: Xin Gao Date: Mon, 17 Oct 2022 15:18:59 -0700 Subject: [PATCH 046/694] lib: zstd: Fix comment typo The double `when' is duplicated in line 999, remove one. Signed-off-by: Xin Gao Signed-off-by: Nick Terrell --- lib/zstd/decompress/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c index b4d81d84479ac..6928e85f9d193 100644 --- a/lib/zstd/decompress/zstd_decompress.c +++ b/lib/zstd/decompress/zstd_decompress.c @@ -996,7 +996,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } /* - * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, * we allow taking a partial block as the input. Currently only raw uncompressed blocks can * be streamed. * -- GitLab From 4782c725c1538aa9ef894ae4a3938db40be7f02c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Oct 2022 14:47:04 -0700 Subject: [PATCH 047/694] zstd: Move zstd-common module exports to zstd_common_module.c The zstd codebase is imported from the upstream zstd repo, and is over-written on every update. Upstream keeps the kernel specific code separate from the main library. So the module definition is moved into the zstd_common_module.c file. This matches the pattern followed by the zstd-compress and zstd-decompress files. I've done build and boot testing on x86-64, i386, and aarch64. I've verified that zstd built both as modules and built-in build and boot. Signed-off-by: Nick Terrell --- lib/zstd/Makefile | 1 + lib/zstd/common/entropy_common.c | 4 ---- lib/zstd/common/zstd_common.c | 10 ---------- lib/zstd/zstd_common_module.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 lib/zstd/zstd_common_module.c diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index 440bd0007ae20..20f08c644b71a 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -35,6 +35,7 @@ zstd_decompress-y := \ decompress/zstd_decompress_block.o \ zstd_common-y := \ + zstd_common_module.o \ common/debug.o \ common/entropy_common.o \ common/error_private.o \ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index a311808c0d566..6353249de614c 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -15,7 +15,6 @@ /* ************************************* * Dependencies ***************************************/ -#include #include "mem.h" #include "error_private.h" /* ERR_*, ERROR */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ @@ -240,7 +239,6 @@ size_t FSE_readNCount( { return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); } -EXPORT_SYMBOL_GPL(FSE_readNCount); /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). @@ -256,7 +254,6 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); } -EXPORT_SYMBOL_GPL(HUF_readStats); FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, @@ -357,4 +354,3 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, (void)bmi2; return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } -EXPORT_SYMBOL_GPL(HUF_readStats_wksp); diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c index 0f1f63be25d9f..3d7e35b309b5d 100644 --- a/lib/zstd/common/zstd_common.c +++ b/lib/zstd/common/zstd_common.c @@ -13,7 +13,6 @@ /*-************************************* * Dependencies ***************************************/ -#include #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "error_private.h" @@ -36,17 +35,14 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } * tells if a return value is an error code * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } -EXPORT_SYMBOL_GPL(ZSTD_isError); /*! ZSTD_getErrorName() : * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } -EXPORT_SYMBOL_GPL(ZSTD_getErrorName); /*! ZSTD_getError() : * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } -EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); /*! ZSTD_getErrorString() : * provides error code string from enum */ @@ -63,7 +59,6 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) return customMem.customAlloc(customMem.opaque, size); return ZSTD_malloc(size); } -EXPORT_SYMBOL_GPL(ZSTD_customMalloc); void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) { @@ -76,7 +71,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) } return ZSTD_calloc(1, size); } -EXPORT_SYMBOL_GPL(ZSTD_customCalloc); void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) { @@ -87,7 +81,3 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) ZSTD_free(ptr); } } -EXPORT_SYMBOL_GPL(ZSTD_customFree); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Common"); diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c new file mode 100644 index 0000000000000..22686e367e6f0 --- /dev/null +++ b/lib/zstd/zstd_common_module.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include + +#include "common/huf.h" +#include "common/fse.h" +#include "common/zstd_internal.h" + +// Export symbols shared by compress and decompress into a common module + +#undef ZSTD_isError /* defined within zstd_internal.h */ +EXPORT_SYMBOL_GPL(FSE_readNCount); +EXPORT_SYMBOL_GPL(HUF_readStats); +EXPORT_SYMBOL_GPL(HUF_readStats_wksp); +EXPORT_SYMBOL_GPL(ZSTD_isError); +EXPORT_SYMBOL_GPL(ZSTD_getErrorName); +EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); +EXPORT_SYMBOL_GPL(ZSTD_customMalloc); +EXPORT_SYMBOL_GPL(ZSTD_customCalloc); +EXPORT_SYMBOL_GPL(ZSTD_customFree); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Common"); -- GitLab From 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 17 Oct 2022 13:32:37 -0700 Subject: [PATCH 048/694] zstd: import usptream v1.5.2 Updates the kernel's zstd library to v1.5.2, the latest zstd release. The upstream tag it is updated to is `v1.5.2-kernel`, which contains several cherry-picked commits on top of the v1.5.2 release which are required for the kernel update. I will create this tag once the PR is ready to merge, until then reference the temporary upstream branch `v1.5.2-kernel-cherrypicks`. I plan to submit this patch as part of the v6.2 merge window. I've done basic build testing & testing on x86-64, i386, and aarch64. I'm merging these patches into my `zstd-next` branch, which is pulled into `linux-next` for further testing. I've benchmarked BtrFS with zstd compression on a x86-64 machine, and saw these results. Decompression speed is a small win across the board. The lower compression levels 1-4 see both compression speed and compression ratio wins. The higher compression levels see a small compression speed loss and about neutral ratio. I expect the lower compression levels to be used much more heavily than the high compression levels, so this should be a net win. Level CTime DTime Ratio 1 -2.95% -1.1% -0.7% 3 -3.5% -1.2% -0.5% 5 +3.7% -1.0% +0.0% 7 +3.2% -0.9% +0.0% 9 -4.3% -0.8% +0.1% Signed-off-by: Nick Terrell --- include/linux/zstd_lib.h | 475 ++-- lib/zstd/common/bitstream.h | 9 + lib/zstd/common/compiler.h | 67 +- lib/zstd/common/entropy_common.c | 7 +- lib/zstd/common/error_private.h | 81 +- lib/zstd/common/fse.h | 3 +- lib/zstd/common/fse_decompress.c | 2 +- lib/zstd/common/huf.h | 46 +- lib/zstd/common/mem.h | 2 + lib/zstd/common/portability_macros.h | 93 + lib/zstd/common/zstd_internal.h | 175 +- lib/zstd/compress/clevels.h | 132 ++ lib/zstd/compress/fse_compress.c | 83 +- lib/zstd/compress/huf_compress.c | 644 +++++- lib/zstd/compress/zstd_compress.c | 1998 +++++++++++++---- lib/zstd/compress/zstd_compress_internal.h | 375 +++- lib/zstd/compress/zstd_compress_literals.c | 9 +- lib/zstd/compress/zstd_compress_literals.h | 4 +- lib/zstd/compress/zstd_compress_sequences.c | 31 +- lib/zstd/compress/zstd_compress_superblock.c | 295 +-- lib/zstd/compress/zstd_cwksp.h | 225 +- lib/zstd/compress/zstd_double_fast.c | 413 +++- lib/zstd/compress/zstd_fast.c | 441 ++-- lib/zstd/compress/zstd_lazy.c | 1352 ++++++++--- lib/zstd/compress/zstd_lazy.h | 38 + lib/zstd/compress/zstd_ldm.c | 76 +- lib/zstd/compress/zstd_ldm.h | 1 + lib/zstd/compress/zstd_ldm_geartab.h | 5 +- lib/zstd/compress/zstd_opt.c | 402 ++-- lib/zstd/decompress/huf_decompress.c | 912 ++++++-- lib/zstd/decompress/zstd_decompress.c | 78 +- lib/zstd/decompress/zstd_decompress_block.c | 1022 +++++++-- lib/zstd/decompress/zstd_decompress_block.h | 10 +- .../decompress/zstd_decompress_internal.h | 38 +- lib/zstd/decompress_sources.h | 6 + lib/zstd/zstd_compress_module.c | 6 +- 36 files changed, 6951 insertions(+), 2605 deletions(-) create mode 100644 lib/zstd/common/portability_macros.h create mode 100644 lib/zstd/compress/clevels.h diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index 6b91758b61af9..79d55465d5c1d 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -17,8 +17,16 @@ /* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#define ZSTDLIB_VISIBILITY -#define ZSTDLIB_API ZSTDLIB_VISIBILITY +#ifndef ZSTDLIB_VISIBLE +# if (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) +# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZSTDLIB_VISIBLE +# define ZSTDLIB_HIDDEN +# endif +#endif +#define ZSTDLIB_API ZSTDLIB_VISIBLE /* ***************************************************************************** @@ -56,8 +64,8 @@ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 10 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 2 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -94,7 +102,6 @@ ZSTDLIB_API const char* ZSTD_versionString(void); #define ZSTD_BLOCKSIZE_MAX (1<= first frame size * @return : the compressed size of the first frame starting at `src`, @@ -165,8 +172,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ /* ************************************* @@ -219,9 +227,9 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, const void* src, size_t srcSize); -/* ************************************* -* Advanced compression API -***************************************/ +/* ******************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ /* API design : * Parameters are pushed one by one into an existing context, @@ -232,7 +240,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * - * This API supercedes all other "advanced" API entry points in the experimental section. + * This API supersedes all other "advanced" API entry points in the experimental section. * In the future, we expect to remove from experimental API entry points which are redundant with this API. */ @@ -251,7 +259,6 @@ typedef enum { ZSTD_fast=1, Only the order (from fast to strong) is guaranteed */ } ZSTD_strategy; - typedef enum { /* compression parameters @@ -317,7 +324,6 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ - /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -374,7 +380,7 @@ typedef enum { ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. * The minimum size is automatically and transparently enforced. */ ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. @@ -404,6 +410,8 @@ typedef enum { * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences + * ZSTD_c_useBlockSplitter + * ZSTD_c_useRowMatchFinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -419,7 +427,10 @@ typedef enum { ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam11=1008, - ZSTD_c_experimentalParam12=1009 + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 } ZSTD_cParameter; typedef struct { @@ -504,9 +515,9 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, const void* src, size_t srcSize); -/* ************************************* -* Advanced decompression API -***************************************/ +/* ********************************************* +* Advanced decompression API (Requires v1.4.0+) +************************************************/ /* The advanced API pushes parameters one by one into an existing DCtx context. * Parameters are sticky, and remain valid for all following frames @@ -668,7 +679,7 @@ typedef enum { : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; -/*! ZSTD_compressStream2() : +/*! ZSTD_compressStream2() : Requires v1.4.0+ * Behaves about the same as ZSTD_compressStream, with additional control on end directive. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) @@ -714,11 +725,11 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output /* ***************************************************************************** - * This following is a legacy streaming API. + * This following is a legacy streaming API, available since v1.0+ . * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. ******************************************************************************/ /*! @@ -796,7 +807,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output /*! ZSTD_compress_usingDict() : * Compression at an explicit compression level using a Dictionary. * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dictBuilder/zdict.h). + * or a buffer with specified information (see zdict.h). * Note : This function loads the dictionary, resulting in significant startup delay. * It's intended for a dictionary used only once. * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ @@ -879,19 +890,25 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, * Dictionary helper functions *******************************/ -/*! ZSTD_getDictID_fromDict() : +/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+ * Provides the dictID stored within dictionary. * if @return == 0, the dictionary is not conformant with Zstandard specification. * It can still be loaded, but as a content-only dictionary. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); -/*! ZSTD_getDictID_fromDDict() : +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ * Provides the dictID of the dictionary loaded into `ddict`. * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); -/*! ZSTD_getDictID_fromFrame() : +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ * Provides the dictID required to decompressed the frame stored within `src`. * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : @@ -905,16 +922,16 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); /* ***************************************************************************** - * Advanced dictionary and prefix API + * Advanced dictionary and prefix API (Requires v1.4.0+) * * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and * only reset with the context is reset with ZSTD_reset_parameters or * ZSTD_reset_session_and_parameters. Prefixes are single-use. ******************************************************************************/ -/*! ZSTD_CCtx_loadDictionary() : +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ * Create an internal CDict from `dict` buffer. * Decompression will have to use same dictionary. * @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -933,7 +950,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * to precisely select how dictionary content must be interpreted. */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -/*! ZSTD_CCtx_refCDict() : +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used for all next compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. @@ -947,7 +964,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -/*! ZSTD_CCtx_refPrefix() : +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) for next compressed frame. * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). * Decompression will need same prefix to properly regenerate data. @@ -968,7 +985,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); -/*! ZSTD_DCtx_loadDictionary() : +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ * Create an internal DDict from dict buffer, * to be used to decompress next frames. * The dictionary remains valid for all future frames, until explicitly invalidated. @@ -985,7 +1002,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, */ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -/*! ZSTD_DCtx_refDDict() : +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. * @@ -1003,7 +1020,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s */ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -/*! ZSTD_DCtx_refPrefix() : +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ * Reference a prefix (single-usage dictionary) to decompress next frame. * This is the reverse operation of ZSTD_CCtx_refPrefix(), * and must use the same prefix as the one used during compression. @@ -1024,7 +1041,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, /* === Memory management === */ -/*! ZSTD_sizeof_*() : +/*! ZSTD_sizeof_*() : Requires v1.4.0+ * These functions give the _current_ memory usage of selected object. * Note that object memory usage can evolve (increase or decrease) over time. */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); @@ -1049,6 +1066,29 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +/* This can be overridden externally to hide static symbols. */ +#ifndef ZSTDLIB_STATIC_API +#define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE +#endif + +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ +#else +# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) +# elif (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + /* ************************************************************************************** * experimental API (static linking only) **************************************************************************************** @@ -1111,9 +1151,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX -/* internal */ -#define ZSTD_HASHLOG3_MAX 17 - /* --- Advanced types --- */ @@ -1255,6 +1292,15 @@ typedef enum { ZSTD_lcm_uncompressed = 2 /*< Always emit uncompressed literals. */ } ZSTD_literalCompressionMode_e; +typedef enum { + /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final + * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable + * or ZSTD_ps_disable allow for a force enable/disable the feature. + */ + ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ + ZSTD_ps_enable = 1, /* Force-enable the feature */ + ZSTD_ps_disable = 2 /* Do not use the feature */ +} ZSTD_paramSwitch_e; /* ************************************* * Frame size functions @@ -1281,7 +1327,7 @@ typedef enum { * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to * read each contained frame header. This is fast as most of the data is skipped, * however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); /*! ZSTD_decompressBound() : * `src` should point to the start of a series of ZSTD encoded and/or skippable frames @@ -1296,13 +1342,13 @@ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: * upper-bound = # blocks * min(128 KB, Window_Size) */ -ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); /*! ZSTD_frameHeaderSize() : * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. * @return : size of the Frame Header, * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); typedef enum { ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ @@ -1325,7 +1371,7 @@ typedef enum { * @return : number of sequences generated */ -ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, +ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : @@ -1339,7 +1385,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters * @return : number of sequences left after merging */ -ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); +ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); /*! ZSTD_compressSequences() : * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. @@ -1369,7 +1415,7 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se * and cannot emit an RLE block that disagrees with the repcode history * @return : final compressed size or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, +ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const void* src, size_t srcSize); @@ -1387,9 +1433,29 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size * * @return : number of bytes written or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, +ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned magicVariant); +/*! ZSTD_readSkippableFrame() : + * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested + * in the magicVariant. + * + * Returns an error if destination buffer is not large enough, or if the frame is not skippable. + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, + const void* src, size_t srcSize); + +/*! ZSTD_isSkippableFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. + */ +ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); + + /* ************************************* * Memory management @@ -1418,10 +1484,10 @@ ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, * Note 2 : only single-threaded compression is supported. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. @@ -1436,20 +1502,20 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * an internal ?Dict will be created, which additional size is not estimated here. * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); /*! ZSTD_estimate?DictSize() : * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); /*! ZSTD_initStatic*() : * Initialize an object using a pre-allocated fixed-size buffer. @@ -1472,20 +1538,20 @@ ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e * Limitation 2 : static cctx currently not compatible with multi-threading. * Limitation 3 : static dctx is incompatible with legacy support. */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ +ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ +ZSTDLIB_STATIC_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( +ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict( void* workspace, size_t workspaceSize, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams); -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( +ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict( void* workspace, size_t workspaceSize, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, @@ -1504,44 +1570,44 @@ static __attribute__((__unused__)) ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams, ZSTD_customMem customMem); -/* ! Thread pool : - * These prototypes make it possible to share a thread pool among multiple compression contexts. - * This can limit resources for applications with multiple threads where each one uses - * a threaded compression mode (via ZSTD_c_nbWorkers parameter). - * ZSTD_createThreadPool creates a new thread pool with a given number of threads. - * Note that the lifetime of such pool must exist while being used. - * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value - * to use an internal thread pool). - * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. +/*! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. */ typedef struct POOL_ctx_s ZSTD_threadPool; -ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); -ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ -ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); +ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); /* * This API is temporary and is expected to change or disappear in the future! */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, const ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( +ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, @@ -1558,28 +1624,22 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( * As a consequence, `dictBuffer` **must** outlive CDict, * and its content must remain unmodified throughout the lifetime of CDict. * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); - -/*! ZSTD_getDictID_fromCDict() : - * Provides the dictID of the dictionary loaded into `cdict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); +ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); /*! ZSTD_getCParams() : * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_getParams() : * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); /*! ZSTD_checkCParams() : * Ensure param values remain within authorized range. * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); +ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); /*! ZSTD_adjustCParams() : * optimize params for a given `srcSize` and `dictSize`. @@ -1587,23 +1647,25 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); * `dictSize` must be `0` when there is no dictionary. * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. + * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, @@ -1613,18 +1675,18 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, /*! ZSTD_CCtx_loadDictionary_byReference() : * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /*! ZSTD_CCtx_loadDictionary_advanced() : * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over * how to load the dictionary (by copy ? by reference ?) * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); /*! ZSTD_CCtx_refPrefix_advanced() : * Same as ZSTD_CCtx_refPrefix(), but gives finer control over * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); /* === experimental parameters === */ /* these parameters can be used with ZSTD_setParameter() @@ -1663,9 +1725,15 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * See the comments on that enum for an explanation of the feature. */ #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. +/* Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never compress literals. + * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals + * may still be emitted if huffman is not beneficial to use.) + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * literals compression based on the compression parameters - specifically, + * negative compression levels do not use literal compression. */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 @@ -1728,7 +1796,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * * Note that this means that the CDict tables can no longer be copied into the * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be - * useable. The dictionary can only be attached or reloaded. + * usable. The dictionary can only be attached or reloaded. * * In general, you should expect compression to be faster--sometimes very much * so--and CDict creation to be slightly slower. Eventually, we will probably @@ -1817,12 +1885,55 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 +/* ZSTD_c_useBlockSplitter + * Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never use block splitter. + * Set to ZSTD_ps_enable to always use block splitter. + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * block splitting based on the compression parameters. + */ +#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 + +/* ZSTD_c_useRowMatchFinder + * Controlled with ZSTD_paramSwitch_e enum. + * Default is ZSTD_ps_auto. + * Set to ZSTD_ps_disable to never use row-based matchfinder. + * Set to ZSTD_ps_enable to force usage of row-based matchfinder. + * + * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use + * the row-based matchfinder based on support for SIMD instructions and the window log. + * Note that this only pertains to compression strategies: greedy, lazy, and lazy2 + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_params : @@ -1842,27 +1953,27 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() * for static allocation of CCtx for single-threaded compression. */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ +ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ /*! ZSTD_CCtxParams_reset() : * Reset params to default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); /*! ZSTD_CCtxParams_init() : * Initializes the compression parameters of cctxParams according to * compression level. All other parameters are reset to their default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); /*! ZSTD_CCtxParams_init_advanced() : * Initializes the compression and frame parameters of cctxParams according to * params. All other parameters are reset to their default values. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -/*! ZSTD_CCtxParams_setParameter() : +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ * Similar to ZSTD_CCtx_setParameter. * Set one compression parameter, selected by enum ZSTD_cParameter. * Parameters must be applied to a ZSTD_CCtx using @@ -1870,14 +1981,14 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z * @result : a code representing success or failure (which can be tested with * ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); /*! ZSTD_CCtxParams_getParameter() : * Similar to ZSTD_CCtx_getParameter. * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -1886,7 +1997,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, * if nbWorkers>=1, new parameters will be picked up at next job, * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); /*! ZSTD_compressStream2_simpleArgs() : @@ -1895,7 +2006,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( * This variant might be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( +ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs ( ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, size_t* dstPos, const void* src, size_t srcSize, size_t* srcPos, @@ -1911,33 +2022,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); +ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size); /*! ZSTD_createDDict_byReference() : * Create a digested dictionary, ready to start decompression operation without startup delay. * Dictionary content is referenced, and therefore stays in dictBuffer. * It is important that dictBuffer outlives DDict, * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); /*! ZSTD_DCtx_loadDictionary_byReference() : * Same as ZSTD_DCtx_loadDictionary(), * but references `dict` content instead of copying it into `dctx`. * This saves memory if `dict` remains around., * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /*! ZSTD_DCtx_loadDictionary_advanced() : * Same as ZSTD_DCtx_loadDictionary(), * but gives direct control over * how to load the dictionary (by copy ? by reference ?) * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); /*! ZSTD_DCtx_refPrefix_advanced() : * Same as ZSTD_DCtx_refPrefix(), but gives finer control over * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); /*! ZSTD_DCtx_setMaxWindowSize() : * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. @@ -1946,14 +2057,14 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); /*! ZSTD_DCtx_getParameter() : * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, * and store it into int* value. * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); /* ZSTD_d_format * experimental parameter, @@ -2028,11 +2139,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param /*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). * Instruct the decoder context about what kind of data to decode next. * This instruction is mandatory to decode data without a fully-formed header, * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), @@ -2040,7 +2153,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); * This can be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( +ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, size_t* dstPos, const void* src, size_t srcSize, size_t* srcPos); @@ -2056,7 +2169,7 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( /*===== Advanced Streaming compression functions =====*/ /*! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); @@ -2065,15 +2178,15 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( * pledgedSrcSize must be correct. If it is not known at init time, use * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); @@ -2082,15 +2195,15 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, * dict == NULL or dictSize < 8, in which case no dict is used. * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /*! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: + * This function is DEPRECATED, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * // Pseudocode: Set each zstd parameter and leave the rest as-is. * for ((param, value) : params) { @@ -2102,23 +2215,24 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. * pledgedSrcSize must be correct. * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: + * This function is DEPRECATED, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, cdict); * * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /*! ZSTD_initCStream_usingCDict_advanced() : * This function is DEPRECATED, and is approximately equivalent to: @@ -2133,18 +2247,21 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. * pledgedSrcSize must be correct. If srcSize is not known at init time, use * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. * * start a new frame, using same parameters from previous frame. * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. @@ -2154,9 +2271,10 @@ ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); typedef struct { @@ -2174,7 +2292,7 @@ typedef struct { * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. * Aggregates progression inside active worker threads. */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); /*! ZSTD_toFlushNow() : * Tell how many bytes are ready to be flushed immediately. @@ -2189,7 +2307,7 @@ ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx * therefore flush speed is limited by production speed of oldest job * irrespective of the speed of concurrent (and newer) jobs. */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); /*===== Advanced Streaming decompression functions =====*/ @@ -2203,7 +2321,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); * note: no dictionary will be used if dict == NULL or dictSize < 8 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /*! * This function is deprecated, and is equivalent to: @@ -2214,7 +2332,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic * note : ddict is referenced, it must outlive decompression session * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); +ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /*! * This function is deprecated, and is equivalent to: @@ -2224,7 +2342,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDi * re-use decompression parameters from previous init; saves dictionary loading * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); +ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /* ******************************************************************* @@ -2243,8 +2361,7 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); ZSTD_CCtx object can be re-used multiple times within successive compression operations. Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). @@ -2267,17 +2384,19 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); */ /*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ +ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ /* Buffer-less streaming decompression (synchronous mode) @@ -2368,24 +2487,24 @@ typedef struct { * @return : 0, `zfhPtr` is correctly filled, * >0, `srcSize` is too small, value is wanted `srcSize` amount, * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ /*! ZSTD_getFrameHeader_advanced() : * same as ZSTD_getFrameHeader(), * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); +ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); @@ -2422,10 +2541,10 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); */ /*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ +ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h index 28248abe8612a..feef3a1b1d600 100644 --- a/lib/zstd/common/bitstream.h +++ b/lib/zstd/common/bitstream.h @@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); + /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better + * than accessing memory. When bmi2 instruction is not present, we consider + * such cpus old (pre-Haswell, 2013) and their performance is not of that + * importance. + */ +#if defined(__x86_64__) || defined(_M_X86) + return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); +#else return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +#endif } MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h index f5a9c70a228a2..c42d39faf9bd8 100644 --- a/lib/zstd/common/compiler.h +++ b/lib/zstd/common/compiler.h @@ -11,6 +11,8 @@ #ifndef ZSTD_COMPILER_H #define ZSTD_COMPILER_H +#include "portability_macros.h" + /*-******************************************************* * Compiler specifics *********************************************************/ @@ -34,7 +36,7 @@ /* On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). - This explictly marks such functions as __cdecl so that the code will still compile + This explicitly marks such functions as __cdecl so that the code will still compile if a CC other than __cdecl has been made the default. */ #define WIN_CDECL @@ -70,25 +72,13 @@ /* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. +/* Target attribute for BMI2 dynamic dispatch. + * Enable lzcnt, bmi, and bmi2. + * We test for bmi1 & bmi2. lzcnt is included in bmi1. */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif +#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2") /* prefetch * can be disabled, by declaring NO_PREFETCH build macro */ @@ -115,8 +105,9 @@ } /* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, + * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # else @@ -134,20 +125,18 @@ #define LIKELY(x) (__builtin_expect((x), 1)) #define UNLIKELY(x) (__builtin_expect((x), 0)) +#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) +# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } +#else +# define ZSTD_UNREACHABLE { assert(0); } +#endif + /* disable warnings */ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ -/* compat. with non-clang compilers */ -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -/* compat. with non-clang compilers */ -#ifndef __has_feature -# define __has_feature(x) 0 -#endif +/* compile time determination of SIMD support */ /* C-language Attributes are added in C23. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) @@ -168,10 +157,28 @@ */ #define ZSTD_FALLTHROUGH fallthrough -/* detects whether we are being compiled under msan */ +/*-************************************************************** +* Alignment check +*****************************************************************/ + +/* this test was initially positioned in mem.h, + * but this file is removed (or replaced) for linux kernel + * so it's now hosted in compiler.h, + * which remains valid for both user & kernel spaces. + */ + +#ifndef ZSTD_ALIGNOF +/* covers gcc, clang & MSVC */ +/* note : this section must come first, before C11, + * due to a limitation in the kernel source generator */ +# define ZSTD_ALIGNOF(T) __alignof(T) + +#endif /* ZSTD_ALIGNOF */ +/*-************************************************************** +* Sanitizer +*****************************************************************/ -/* detects whether we are being compiled under asan */ #endif /* ZSTD_COMPILER_H */ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index 6353249de614c..fef67056f0524 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default( } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( +BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2( short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) { @@ -240,6 +240,7 @@ size_t FSE_readNCount( return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); } + /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. @@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } @@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r } #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, +static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h index d14e686adf951..ca5101e542faa 100644 --- a/lib/zstd/common/error_private.h +++ b/lib/zstd/common/error_private.h @@ -18,8 +18,10 @@ /* **************************************** * Dependencies ******************************************/ -#include "zstd_deps.h" /* size_t */ #include /* enum list */ +#include "compiler.h" +#include "debug.h" +#include "zstd_deps.h" /* size_t */ /* **************************************** @@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) return ERR_getErrorString(ERR_getErrorCode(code)); } +/* + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/* + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +#define ERR_QUOTE(str) #str + +/* + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/* + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/* + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + #endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h index 0bb174c2c3677..4507043b2287c 100644 --- a/lib/zstd/common/fse.h +++ b/lib/zstd/common/fse.h @@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + * See FSE_buildCTable_wksp() for breakdown of workspace usage. */ -#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */) #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index 2c8bbe3e4c148..a0d06095be83d 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) { return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); } diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h index 88c5586646aa5..5042ff8703087 100644 --- a/lib/zstd/common/huf.h +++ b/lib/zstd/common/huf.h @@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, /* HUF_compress4X_wksp() : * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) -#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) + * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) +#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, @@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_SYMBOLVALUE_MAX 255 -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) # error "HUF_TABLELOG_MAX is too large !" #endif @@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* static allocation of HUF's Compression Table */ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt */ -typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ -#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ -#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +typedef size_t HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t)) #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */ /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; @@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -203,12 +200,13 @@ typedef enum { * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); /* HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. @@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); -/* HUF_getNbBits() : +/* HUF_getNbBitsFromCTable() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private - * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ -U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + * Note 1 : is not inlined, as HUF_CElt definition is private */ +U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); /* * HUF_decompress() does the following: @@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c /* ====================== */ size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); /* HUF_compress1X_repeat() : * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ #ifndef HUF_FORCE_DECOMPRESS_X1 @@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds #ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); #endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif #endif /* HUF_STATIC_LINKING_ONLY */ diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h index dcdd586a9fd91..1d9cc03924ca9 100644 --- a/lib/zstd/common/mem.h +++ b/lib/zstd/common/mem.h @@ -30,6 +30,8 @@ * Basic Types *****************************************************************/ typedef uint8_t BYTE; +typedef uint8_t U8; +typedef int8_t S8; typedef uint16_t U16; typedef int16_t S16; typedef uint32_t U32; diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h new file mode 100644 index 0000000000000..0e3b2c0a527db --- /dev/null +++ b/lib/zstd/common/portability_macros.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_PORTABILITY_MACROS_H +#define ZSTD_PORTABILITY_MACROS_H + +/* + * This header file contains macro defintions to support portability. + * This header is shared between C and ASM code, so it MUST only + * contain macro definitions. It MUST not contain any C code. + * + * This header ONLY defines macros to detect platforms/feature support. + * + */ + + +/* compat. with non-clang compilers */ +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ + +/* detects whether we are being compiled under asan */ + +/* detects whether we are being compiled under dfsan */ + +/* Mark the internal assembly functions as hidden */ +#ifdef __ELF__ +# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func +#else +# define ZSTD_HIDE_ASM_FUNCTION(func) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X64)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* + * Only enable assembly for GNUC comptabile compilers, + * because other platforms may not support GAS assembly syntax. + * + * Only enable assembly for Linux / MacOS, other platforms may + * work, but they haven't been tested. This could likely be + * extended to BSD systems. + * + * Disable assembly when MSAN is enabled, because MSAN requires + * 100% of code to be instrumented to work. + */ +#define ZSTD_ASM_SUPPORTED 1 + +/* + * Determines whether we should enable assembly for x86-64 + * with BMI2. + * + * Enable if all of the following conditions hold: + * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM + * - Assembly is supported + * - We are compiling for x86-64 and either: + * - DYNAMIC_BMI2 is enabled + * - BMI2 is supported at compile time + */ +#define ZSTD_ENABLE_ASM_X86_64_BMI2 0 + +#endif /* ZSTD_PORTABILITY_MACROS_H */ diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h index fc6f3a9b40c07..93305d9b41bba 100644 --- a/lib/zstd/common/zstd_internal.h +++ b/lib/zstd/common/zstd_internal.h @@ -20,6 +20,7 @@ * Dependencies ***************************************/ #include "compiler.h" +#include "cpu.h" #include "mem.h" #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" @@ -47,81 +48,7 @@ #undef MAX #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) - -/* - * Ignore: this is an internal helper. - * - * This is a helper function to help force C99-correctness during compilation. - * Under strict compilation modes, variadic macro arguments can't be empty. - * However, variadic function arguments can be. Using a function therefore lets - * us statically check that at least one (string) argument was passed, - * independent of the compilation flags. - */ -static INLINE_KEYWORD UNUSED_ATTR -void _force_has_format_string(const char *format, ...) { - (void)format; -} - -/* - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ - if (0) { \ - _force_has_format_string(__VA_ARGS__); \ - } - -/* - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } - -/* - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ - do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } while(0); - -/* - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); +#define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) /*-************************************* @@ -130,7 +57,6 @@ void _force_has_format_string(const char *format, ...) { #define ZSTD_OPT_NUM (1<<12) #define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) @@ -182,7 +108,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy /* Each table cannot take more than #symbols * FSELog bits */ #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) -static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = { +static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, @@ -199,7 +125,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { #define LL_DEFAULTNORMLOG 6 /* for static allocation */ static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; -static UNUSED_ATTR const U32 ML_bits[MaxML+1] = { +static UNUSED_ATTR const U8 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -234,12 +160,31 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; * Shared functions to include for inlining *********************************************/ static void ZSTD_copy8(void* dst, const void* src) { +#if defined(ZSTD_ARCH_ARM_NEON) + vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else ZSTD_memcpy(dst, src, 8); +#endif } - #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } + +/* Need to use memmove here since the literal buffer can now be located within + the dst buffer. In circumstances where the op "catches up" to where the + literal buffer is, there can be partial overlaps in this call on the final + copy if the literal is being shifted by less than 16 bytes. */ static void ZSTD_copy16(void* dst, const void* src) { - ZSTD_memcpy(dst, src, 16); +#if defined(ZSTD_ARCH_ARM_NEON) + vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#elif defined(ZSTD_ARCH_X86_SSE2) + _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); +#elif defined(__clang__) + ZSTD_memmove(dst, src, 16); +#else + /* ZSTD_memmove is not inlined properly by gcc */ + BYTE copy16_buf[16]; + ZSTD_memcpy(copy16_buf, src, 16); + ZSTD_memcpy(dst, copy16_buf, 16); +#endif } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } @@ -267,8 +212,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { /* Handle short offset copies. */ do { @@ -331,11 +274,18 @@ typedef enum { * Private declarations *********************************************/ typedef struct seqDef_s { - U32 offset; /* Offset code of the sequence */ + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ U16 litLength; - U16 matchLength; + U16 mlBase; /* mlBase == matchLength - MINMATCH */ } seqDef; +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + typedef struct { seqDef* sequencesStart; seqDef* sequences; /* ptr to end of sequences */ @@ -347,12 +297,12 @@ typedef struct { size_t maxNbSeq; size_t maxNbLit; - /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment * the existing value of the litLength or matchLength by 0x10000. */ - U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ } seqStore_t; typedef struct { @@ -362,18 +312,18 @@ typedef struct { /* * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. */ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) { ZSTD_sequenceLength seqLen; seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->matchLength + MINMATCH; + seqLen.matchLength = seq->mlBase + MINMATCH; if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { seqLen.litLength += 0xFFFF; } - if (seqStore->longLengthID == 2) { + if (seqStore->longLengthType == ZSTD_llt_matchLength) { seqLen.matchLength += 0xFFFF; } } @@ -419,6 +369,41 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus } } +/* + * Counts the number of trailing zeros of a `size_t`. + * Most compilers should support CTZ as a builtin. A backup + * implementation is provided if the builtin isn't supported, but + * it may not be terribly efficient. + */ +MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) +{ + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return __builtin_ctzll((U64)val); +# else + static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, + 4, 25, 14, 28, 9, 34, 20, 56, + 5, 17, 26, 54, 15, 41, 29, 43, + 10, 31, 38, 35, 21, 45, 49, 57, + 63, 6, 12, 18, 24, 27, 33, 55, + 16, 53, 40, 42, 30, 37, 44, 48, + 62, 11, 23, 32, 52, 39, 36, 47, + 61, 22, 51, 46, 60, 50, 59, 58 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return __builtin_ctz((U32)val); +# else + static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } +} + /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. @@ -445,6 +430,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize); +/* + * @returns true iff the CPU supports dynamic BMI2 dispatch. + */ +MEM_STATIC int ZSTD_cpuSupportsBmi2(void) +{ + ZSTD_cpuid_t cpuid = ZSTD_cpuid(); + return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); +} #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h new file mode 100644 index 0000000000000..d9a76112ec3af --- /dev/null +++ b/lib/zstd/compress/clevels.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CLEVELS_H +#define ZSTD_CLEVELS_H + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 + +__attribute__((__unused__)) + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ + { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + + + +#endif /* ZSTD_CLEVELS_H */ diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c index 436985b620e51..ec5b1ca6d71af 100644 --- a/lib/zstd/compress/fse_compress.c +++ b/lib/zstd/compress/fse_compress.c @@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); U32 const step = FSE_TABLESTEP(tableSize); + U32 const maxSV1 = maxSymbolValue+1; - U32* cumul = (U32*)workSpace; - FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + U16* cumul = (U16*)workSpace; /* size = maxSV1 */ + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ U32 highThreshold = tableSize-1; - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); /* CTable header */ tableU16[-2] = (U16) tableLog; @@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, /* symbol start positions */ { U32 u; cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { + for (u=1; u <= maxSV1; u++) { if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ cumul[u] = cumul[u-1] + 1; tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + assert(normalizedCounter[u-1] >= 0); + cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; + assert(cumul[u] >= cumul[u-1]); /* no overflow */ } } - cumul[maxSymbolValue+1] = tableSize+1; + cumul[maxSV1] = (U16)(tableSize+1); } /* Spread symbols */ - { U32 position = 0; + if (highThreshold == tableSize - 1) { + /* Case for no low prob count symbols. Lay down 8 bytes at a time + * to reduce branch misses since we are operating on a small block + */ + BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ + { U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s=0); + pos += (size_t)n; + } + } + /* Spread symbols across the table. Lack of lowprob symbols means that + * we don't need variable sized inner loop, so we can unroll the loop and + * reduce branch misses. + */ + { size_t position = 0; + size_t s; + size_t const unroll = 2; /* Experimentally determined optimal unroll */ + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableSymbol[uPosition] = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); /* Must have initialized all positions */ + } + } else { + U32 position = 0; U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { + for (symbol=0; symbol highThreshold) position = (position + step) & tableMask; /* Low proba area */ } } - assert(position==0); /* Must have initialized all positions */ } @@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, case -1: case 1: symbolTT[s].deltaNbBits = (tableLog << 16) - (1< 1); + { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); + U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = total - normalizedCounter[s]; - total += normalizedCounter[s]; + symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); + total += (unsigned)normalizedCounter[s]; } } } } #if 0 /* debug : symbol costs */ @@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, symbol, normalizedCounter[symbol], FSE_getMaxNbBits(symbolTT, symbol), (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); - } - } + } } #endif return 0; @@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, - #ifndef FSE_COMMONDEFS_ONLY - /*-************************************************************** * FSE NCount encoding ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { - size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog + + 4 /* bitCount initialized at 4 */ + + 2 /* first two symbols may use one additional bit each */) / 8) + + 1 /* round up to whole nb bytes */ + + 2 /* additional two bytes for bitstream flush */; return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c index f76a526bfa54b..74ef0db476210 100644 --- a/lib/zstd/compress/huf_compress.c +++ b/lib/zstd/compress/huf_compress.c @@ -50,6 +50,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS /* ******************************************************* * HUF : Huffman block compression *********************************************************/ +#define HUF_WORKSPACE_MAX_ALIGNMENT 8 + +static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) +{ + size_t const mask = align - 1; + size_t const rem = (size_t)workspace & mask; + size_t const add = (align - rem) & mask; + BYTE* const aligned = (BYTE*)workspace + add; + assert((align & (align - 1)) == 0); /* pow 2 */ + assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT); + if (*workspaceSizePtr >= add) { + assert(add < align); + assert(((size_t)aligned & mask) == 0); + *workspaceSizePtr -= add; + return aligned; + } else { + *workspaceSizePtr = 0; + return NULL; + } +} + + /* HUF_compressWeights() : * Same as FSE_compress(), but dedicated to huff0's weights compression. * The use case needs much less stack memory. @@ -72,7 +94,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT unsigned maxSymbolValue = HUF_TABLELOG_MAX; U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); @@ -103,6 +125,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT return (size_t)(op-ostart); } +static size_t HUF_getNbBits(HUF_CElt elt) +{ + return elt & 0xFF; +} + +static size_t HUF_getNbBitsFast(HUF_CElt elt) +{ + return elt; +} + +static size_t HUF_getValue(HUF_CElt elt) +{ + return elt & ~0xFF; +} + +static size_t HUF_getValueFast(HUF_CElt elt) +{ + return elt; +} + +static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) +{ + assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); + *elt = nbBits; +} + +static void HUF_setValue(HUF_CElt* elt, size_t value) +{ + size_t const nbBits = HUF_getNbBits(*elt); + if (nbBits > 0) { + assert((value >> nbBits) == 0); + *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); + } +} typedef struct { HUF_CompressWeightsWksp wksp; @@ -114,9 +170,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize) { + HUF_CElt const* const ct = CTable + 1; BYTE* op = (BYTE*)dst; U32 n; - HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); /* check conditions */ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); @@ -127,9 +184,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); for (n=0; nhuffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; + wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; /* attempt weights compression by FSE */ + if (maxDstSize < 1) return ERROR(dstSize_tooSmall); { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ op[0] = (BYTE)hSize; @@ -163,6 +221,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; U32 nbSymbols = 0; + HUF_CElt* const ct = CTable + 1; /* get symbol weights */ CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); @@ -172,6 +231,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + CTable[0] = tableLog; + /* Prepare base value per rank */ { U32 n, nextRankStart = 0; for (n=1; n<=tableLog; n++) { @@ -183,13 +244,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void /* fill nbBits */ { U32 n; for (n=0; nn=tableLog+1 */ U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; - { U32 n; for (n=0; n>= 1; } } /* assign value within rank, symbol order */ - { U32 n; for (n=0; n huffNode[i-1].count) { + return 0; + } + } + return 1; +} + +/* Insertion sort by descending order */ +HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { + int i; + int const size = high-low+1; + huffNode += low; + for (i = 1; i < size; ++i) { + nodeElt const key = huffNode[i]; + int j = i - 1; + while (j >= 0 && huffNode[j].count < key.count) { + huffNode[j + 1] = huffNode[j]; + j--; + } + huffNode[j + 1] = key; + } +} + +/* Pivot helper function for quicksort. */ +static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { + /* Simply select rightmost element as pivot. "Better" selectors like + * median-of-three don't experimentally appear to have any benefit. + */ + U32 const pivot = arr[high].count; + int i = low - 1; + int j = low; + for ( ; j < high; j++) { + if (arr[j].count > pivot) { + i++; + HUF_swapNodes(&arr[i], &arr[j]); + } + } + HUF_swapNodes(&arr[i + 1], &arr[high]); + return i + 1; +} + +/* Classic quicksort by descending with partially iterative calls + * to reduce worst case callstack size. + */ +static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { + int const kInsertionSortThreshold = 8; + if (high - low < kInsertionSortThreshold) { + HUF_insertionSort(arr, low, high); + return; + } + while (low < high) { + int const idx = HUF_quickSortPartition(arr, low, high); + if (idx - low < high - idx) { + HUF_simpleQuickSort(arr, low, idx - 1); + low = idx + 1; + } else { + HUF_simpleQuickSort(arr, idx + 1, high); + high = idx - 1; + } + } +} + /* * HUF_sort(): * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. * * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. * Must have (maxSymbolValue + 1) entries. @@ -387,44 +544,52 @@ typedef struct { * @param[in] maxSymbolValue Maximum symbol value. * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. */ -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) -{ - int n; - int const maxSymbolValue1 = (int)maxSymbolValue + 1; +static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { + U32 n; + U32 const maxSymbolValue1 = maxSymbolValue+1; /* Compute base and set curr to base. - * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. - * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. + * See HUF_getIndex to see bucketing strategy. * We attribute each symbol to lowerRank's base value, because we want to know where * each rank begins in the output, so for rank R we want to count ranks R+1 and above. */ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); for (n = 0; n < maxSymbolValue1; ++n) { - U32 lowerRank = BIT_highbit32(count[n] + 1); + U32 lowerRank = HUF_getIndex(count[n]); + assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); rankPosition[lowerRank].base++; } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + /* Set up the rankPosition table */ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { rankPosition[n-1].base += rankPosition[n].base; rankPosition[n-1].curr = rankPosition[n-1].base; } - /* Sort */ + + /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ for (n = 0; n < maxSymbolValue1; ++n) { U32 const c = count[n]; - U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rankPosition[r].curr++; - /* Insert into the correct position in the rank. - * We have at most 256 symbols, so this insertion should be fine. - */ - while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { - huffNode[pos] = huffNode[pos-1]; - pos--; - } + U32 const r = HUF_getIndex(c) + 1; + U32 const pos = rankPosition[r].curr++; + assert(pos < maxSymbolValue1); huffNode[pos].count = c; huffNode[pos].byte = (BYTE)n; } -} + /* Sort each bucket. */ + for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { + U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; + U32 const bucketStartIdx = rankPosition[n].base; + if (bucketSize > 1) { + assert(bucketStartIdx < maxSymbolValue1); + HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); + } + } + + assert(HUF_isSorted(huffNode, maxSymbolValue1)); +} /* HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. @@ -487,6 +652,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) */ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) { + HUF_CElt* const ct = CTable + 1; /* fill result into ctable (val, nbBits) */ int n; U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; @@ -502,20 +668,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i min >>= 1; } } for (n=0; nhuffNodeTbl; nodeElt* const huffNode = huffNode0+1; int nonNullRank; /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return ERROR(workSpace_tooSmall); if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; @@ -533,99 +699,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); return maxNbBits; } size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + HUF_CElt const* ct = CTable + 1; size_t nbBits = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { - nbBits += CTable[s].nbBits * count[s]; + nbBits += HUF_getNbBits(ct[s]) * count[s]; } return nbBits >> 3; } int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + HUF_CElt const* ct = CTable + 1; int bad = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); } return !bad; } size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } +/* HUF_CStream_t: + * Huffman uses its own BIT_CStream_t implementation. + * There are three major differences from BIT_CStream_t: + * 1. HUF_addBits() takes a HUF_CElt (size_t) which is + * the pair (nbBits, value) in the format: + * format: + * - Bits [0, 4) = nbBits + * - Bits [4, 64 - nbBits) = 0 + * - Bits [64 - nbBits, 64) = value + * 2. The bitContainer is built from the upper bits and + * right shifted. E.g. to add a new value of N bits + * you right shift the bitContainer by N, then or in + * the new value into the N upper bits. + * 3. The bitstream has two bit containers. You can add + * bits to the second container and merge them into + * the first container. + */ + +#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) + +typedef struct { + size_t bitContainer[2]; + size_t bitPos[2]; + + BYTE* startPtr; + BYTE* ptr; + BYTE* endPtr; +} HUF_CStream_t; + +/*! HUF_initCStream(): + * Initializes the bitstream. + * @returns 0 or an error code. + */ +static size_t HUF_initCStream(HUF_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + ZSTD_memset(bitC, 0, sizeof(*bitC)); + bitC->startPtr = (BYTE*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); + if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! HUF_addBits(): + * Adds the symbol stored in HUF_CElt elt to the bitstream. + * + * @param elt The element we're adding. This is a (nbBits, value) pair. + * See the HUF_CStream_t docs for the format. + * @param idx Insert into the bitstream at this idx. + * @param kFast This is a template parameter. If the bitstream is guaranteed + * to have at least 4 unused bits after this call it may be 1, + * otherwise it must be 0. HUF_addBits() is faster when fast is set. + */ +FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) +{ + assert(idx <= 1); + assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); + /* This is efficient on x86-64 with BMI2 because shrx + * only reads the low 6 bits of the register. The compiler + * knows this and elides the mask. When fast is set, + * every operation can use the same value loaded from elt. + */ + bitC->bitContainer[idx] >>= HUF_getNbBits(elt); + bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); + /* We only read the low 8 bits of bitC->bitPos[idx] so it + * doesn't matter that the high bits have noise from the value. + */ + bitC->bitPos[idx] += HUF_getNbBitsFast(elt); + assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + /* The last 4-bits of elt are dirty if fast is set, + * so we must not be overwriting bits that have already been + * inserted into the bit container. + */ +#if DEBUGLEVEL >= 1 + { + size_t const nbBits = HUF_getNbBits(elt); + size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; + (void)dirtyBits; + /* Middle bits are 0. */ + assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); + /* We didn't overwrite any bits in the bit container. */ + assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + (void)dirtyBits; + } +#endif +} + +FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) +{ + bitC->bitContainer[1] = 0; + bitC->bitPos[1] = 0; +} + +/*! HUF_mergeIndex1() : + * Merges the bit container @ index 1 into the bit container @ index 0 + * and zeros the bit container @ index 1. + */ +FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) +{ + assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); + bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); + bitC->bitContainer[0] |= bitC->bitContainer[1]; + bitC->bitPos[0] += bitC->bitPos[1]; + assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); +} + +/*! HUF_flushBits() : +* Flushes the bits in the bit container @ index 0. +* +* @post bitPos will be < 8. +* @param kFast If kFast is set then we must know a-priori that +* the bit container will not overflow. +*/ +FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) +{ + /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ + size_t const nbBits = bitC->bitPos[0] & 0xFF; + size_t const nbBytes = nbBits >> 3; + /* The top nbBits bits of bitContainer are the ones we need. */ + size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); + /* Mask bitPos to account for the bytes we consumed. */ + bitC->bitPos[0] &= 7; + assert(nbBits > 0); + assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitContainer); + bitC->ptr += nbBytes; + assert(!kFast || bitC->ptr <= bitC->endPtr); + if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + /* bitContainer doesn't need to be modified because the leftover + * bits are already the top bitPos bits. And we don't care about + * noise in the lower values. + */ +} + +/*! HUF_endMark() + * @returns The Huffman stream end mark: A 1-bit value = 1. + */ +static HUF_CElt HUF_endMark(void) +{ + HUF_CElt endMark; + HUF_setNbBits(&endMark, 1); + HUF_setValue(&endMark, 1); + return endMark; +} + +/*! HUF_closeCStream() : + * @return Size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +static size_t HUF_closeCStream(HUF_CStream_t* bitC) +{ + HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); + HUF_flushBits(bitC, /* kFast */ 0); + { + size_t const nbBits = bitC->bitPos[0] & 0xFF; + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (nbBits > 0); + } +} + FORCE_INLINE_TEMPLATE void -HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) { - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); + HUF_addBits(bitCPtr, CTable[symbol], idx, fast); } -#define HUF_FLUSHBITS(s) BIT_flushBits(s) +FORCE_INLINE_TEMPLATE void +HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, + const BYTE* ip, size_t srcSize, + const HUF_CElt* ct, + int kUnroll, int kFastFlush, int kLastFast) +{ + /* Join to kUnroll */ + int n = (int)srcSize; + int rem = n % kUnroll; + if (rem > 0) { + for (; rem > 0; --rem) { + HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); + } + HUF_flushBits(bitC, kFastFlush); + } + assert(n % kUnroll == 0); + + /* Join to 2 * kUnroll */ + if (n % (2 * kUnroll)) { + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); + HUF_flushBits(bitC, kFastFlush); + n -= kUnroll; + } + assert(n % (2 * kUnroll) == 0); + + for (; n>0; n-= 2 * kUnroll) { + /* Encode kUnroll symbols into the bitstream @ index 0. */ + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); + HUF_flushBits(bitC, kFastFlush); + /* Encode kUnroll symbols into the bitstream @ index 1. + * This allows us to start filling the bit container + * without any data dependencies. + */ + HUF_zeroIndex1(bitC); + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); + /* Merge bitstream @ index 1 into the bitstream @ index 0 */ + HUF_mergeIndex1(bitC); + HUF_flushBits(bitC, kFastFlush); + } + assert(n == 0); + +} -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) +/* + * Returns a tight upper bound on the output space needed by Huffman + * with 8 bytes buffer to handle over-writes. If the output is at least + * this large we don't need to do bounds checks during Huffman encoding. + */ +static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) +{ + return ((srcSize * tableLog) >> 3) + 8; +} -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) FORCE_INLINE_TEMPLATE size_t HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { + U32 const tableLog = (U32)CTable[0]; + HUF_CElt const* ct = CTable + 1; const BYTE* ip = (const BYTE*) src; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - size_t n; - BIT_CStream_t bitC; + HUF_CStream_t bitC; /* init */ if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); if (HUF_isError(initErr)) return 0; } - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) - { - case 3: - HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - HUF_FLUSHBITS_2(&bitC); - ZSTD_FALLTHROUGH; - case 2: - HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - HUF_FLUSHBITS_1(&bitC); - ZSTD_FALLTHROUGH; - case 1: - HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - HUF_FLUSHBITS(&bitC); - ZSTD_FALLTHROUGH; - case 0: ZSTD_FALLTHROUGH; - default: break; - } - - for (; n>0; n-=4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); + if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); + else { + if (MEM_32bits()) { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: ZSTD_FALLTHROUGH; + case 9: ZSTD_FALLTHROUGH; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 7: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } else { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 9: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 7: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 6: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } + } + assert(bitC.ptr <= bitC.endPtr); + + return HUF_closeCStream(&bitC); } #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) @@ -667,9 +1068,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); } +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); +} static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, @@ -689,8 +1094,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; } @@ -698,8 +1102,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; } @@ -707,8 +1110,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; } @@ -717,7 +1119,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); assert(ip <= iend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; + if (cSize == 0 || cSize > 65535) return 0; op += cSize; } @@ -726,7 +1128,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); } typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; @@ -750,35 +1157,38 @@ static size_t HUF_compressCTable_internal( typedef struct { unsigned count[HUF_SYMBOLVALUE_MAX + 1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; union { HUF_buildCTable_wksp_tables buildCTable_wksp; HUF_WriteCTableWksp writeCTable_wksp; + U32 hist_wksp[HIST_WKSP_SIZE_U32]; } wksps; } HUF_compress_tables_t; +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ + /* HUF_compress_internal() : * `workSpace_align4` must be aligned on 4-bytes boundaries, - * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ static size_t HUF_compress_internal (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, - void* workSpace_align4, size_t wkspSize, + void* workSpace, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2) + const int bmi2, unsigned suspectUncompressible) { - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); - assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); /* checks & inits */ - if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); if (!srcSize) return 0; /* Uncompressed */ if (!dstSize) return 0; /* cannot fit anything within dst budget */ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ @@ -794,8 +1204,23 @@ HUF_compress_internal (void* dst, size_t dstSize, nbStreams, oldHufTable, bmi2); } + /* If uncompressible data is suspected, do a smaller sampling first */ + DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); + if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { + size_t largestTotal = 0; + { unsigned maxSymbolValueBegin = maxSymbolValue; + CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestBegin; + } + { unsigned maxSymbolValueEnd = maxSymbolValue; + CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestEnd; + } + if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ } @@ -820,9 +1245,12 @@ HUF_compress_internal (void* dst, size_t dstSize, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; - /* Zero unused symbols in CTable, so we can check it for validity */ - ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, - sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + /* Zero unused symbols in CTable, so we can check it for validity */ + { + size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); + size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); + ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); } /* Write table description header */ @@ -859,19 +1287,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); + NULL, NULL, 0, 0 /*bmi2*/, 0); } size_t HUF_compress1X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, + int bmi2, unsigned suspectUncompressible) { return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2); + repeat, preferRepeat, bmi2, suspectUncompressible); } /* HUF_compress4X_repeat(): @@ -885,21 +1314,22 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize, return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); + NULL, NULL, 0, 0 /*bmi2*/, 0); } /* HUF_compress4X_repeat(): * compress input using 4 streams. + * consider skipping quickly * re-use an existing huffman compression table */ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) { return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2); + hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); } diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c index 73fff4c60149d..f620cafca633b 100644 --- a/lib/zstd/compress/zstd_compress.c +++ b/lib/zstd/compress/zstd_compress.c @@ -12,7 +12,6 @@ * Dependencies ***************************************/ #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ -#include "../common/cpu.h" #include "../common/mem.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ @@ -39,6 +38,18 @@ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. */ +/*! + * ZSTD_HASHLOG3_MAX : + * Maximum size of the hash table dedicated to find 3-bytes matches, + * in log format, aka 17 => 1 << 17 == 128Ki positions. + * This structure is only used in zstd_opt. + * Since allocation is centralized for all strategies, it has to be known here. + * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, + * so that zstd_opt.c doesn't need to know about this constant. + */ +#ifndef ZSTD_HASHLOG3_MAX +# define ZSTD_HASHLOG3_MAX 17 +#endif /*-************************************* * Helper functions @@ -69,6 +80,10 @@ struct ZSTD_CDict_s { ZSTD_customMem customMem; U32 dictID; int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -81,7 +96,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) assert(cctx != NULL); ZSTD_memset(cctx, 0, sizeof(*cctx)); cctx->customMem = memManager; - cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + cctx->bmi2 = ZSTD_cpuSupportsBmi2(); { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); assert(!ZSTD_isError(err)); (void)err; @@ -192,12 +207,64 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { + assert(mode != ZSTD_ps_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); +} + +/* Returns row matchfinder usage given an initial mode and cParams */ +static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { +#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_ps_disable; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_ps_enable; + } else { + if (cParams->windowLog > 17) mode = ZSTD_ps_enable; + } + return mode; +} + +/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ +static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_ps_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + /* Returns 1 if compression parameters are such that we should * enable long distance matching (wlog >= 27, strategy >= btopt). * Returns 0 otherwise. */ -static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { - return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( @@ -208,15 +275,15 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); cctxParams.cParams = cParams; - if (ZSTD_CParams_shouldEnableLdm(&cParams)) { - DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); - cctxParams.ldmParams.enableLdm = 1; - /* LDM is enabled by default for optimal parser and window size >= 128MB */ + /* Adjust advanced params according to cParams */ + cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams); + if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); assert(cctxParams.ldmParams.hashRateLog < 32); } - + cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -275,6 +342,11 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par * But, set it for tracing anyway. */ cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); + cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", + cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); } size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) @@ -431,9 +503,9 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_literalCompressionMode: - ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); - bounds.lowerBound = ZSTD_lcm_auto; - bounds.upperBound = ZSTD_lcm_uncompressed; + ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; return bounds; case ZSTD_c_targetCBlockSize: @@ -462,6 +534,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_useBlockSplitter: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_deterministicRefPrefix: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -523,6 +610,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: default: return 0; } @@ -575,6 +665,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -672,7 +765,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, } case ZSTD_c_literalCompressionMode : { - const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); CCtxParams->literalCompressionMode = lcm; return CCtxParams->literalCompressionMode; @@ -699,7 +792,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->enableDedicatedDictSearch; case ZSTD_c_enableLongDistanceMatching : - CCtxParams->ldmParams.enableLdm = (value!=0); + CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; return CCtxParams->ldmParams.enableLdm; case ZSTD_c_ldmHashLog : @@ -758,6 +851,21 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->validateSequences = value; return CCtxParams->validateSequences; + case ZSTD_c_useBlockSplitter: + BOUNDCHECK(ZSTD_c_useBlockSplitter, value); + CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; + return CCtxParams->useBlockSplitter; + + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; + return CCtxParams->useRowMatchFinder; + + case ZSTD_c_deterministicRefPrefix: + BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); + CCtxParams->deterministicRefPrefix = !!value; + return CCtxParams->deterministicRefPrefix; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -863,6 +971,15 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_validateSequences : *value = (int)CCtxParams->validateSequences; break; + case ZSTD_c_useBlockSplitter : + *value = (int)CCtxParams->useBlockSplitter; + break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; + case ZSTD_c_deterministicRefPrefix: + *value = (int)CCtxParams->deterministicRefPrefix; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -889,7 +1006,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( return 0; } -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, @@ -969,14 +1086,14 @@ size_t ZSTD_CCtx_loadDictionary_advanced( return 0; } -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( +size_t ZSTD_CCtx_loadDictionary_byReference( ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { return ZSTD_CCtx_loadDictionary_advanced( cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); } -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { return ZSTD_CCtx_loadDictionary_advanced( cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); @@ -1146,7 +1263,7 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, break; case ZSTD_cpm_createCDict: /* Assume a small source size when creating a dictionary - * with an unkown source size. + * with an unknown source size. */ if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) srcSize = minSrcSize; @@ -1220,7 +1337,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( srcSizeHint = CCtxParams->srcSizeHint; } cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); - if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); assert(!ZSTD_checkCParams(cParams)); /* srcSizeHint == 0 means 0 */ @@ -1229,9 +1346,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( static size_t ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, const U32 forCCtx) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; @@ -1241,43 +1363,53 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + hSize * sizeof(U32) + h3Size * sizeof(U32); size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) + : 0; size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) ? optPotentialSpace : 0; + size_t const slackSpace = ZSTD_cwksp_slack_space_required(); + + /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ + ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_ps_auto); + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, const int isStatic, + const ZSTD_paramSwitch_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize) { - size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (cParams->minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); - size_t const ldmSeqSpace = ldmParams->enableLdm ? - ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? + ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) @@ -1303,19 +1435,32 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); /* estimateCCtxSize is for one-shot compression. So no buffers should * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) @@ -1355,17 +1500,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, ZSTD_CONTENTSIZE_UNKNOWN); } } size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) @@ -1480,20 +1637,27 @@ typedef enum { ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, const ZSTD_compResetPolicy_e crp, const ZSTD_indexResetPolicy_e forceResetIndex, const ZSTD_resetTarget_e forWho) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_ps_auto); if (forceResetIndex == ZSTDirp_reset) { ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); @@ -1532,11 +1696,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + { /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize*sizeof(U16); + ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); + assert(cParams->hashLog >= rowLog); + ms->rowHashLog = cParams->hashLog - rowLog; + } + } + ms->cParams = *cParams; RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, "failed a workspace allocation in ZSTD_reset_matchState"); - return 0; } @@ -1553,61 +1729,87 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); } +/* ZSTD_dictTooBig(): + * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in + * one go generically. So we ensure that in that case we reset the tables to zero, + * so that we can load as much of the dictionary as possible. + */ +static int ZSTD_dictTooBig(size_t const loadedDictSize) +{ + return loadedDictSize > ZSTD_CHUNKSIZE_MAX; +} + /*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ + * @param loadedDictSize The size of the dictionary to be loaded + * into the context, if any. If no dictionary is used, or the + * dictionary is being attached / copied, then pass 0. + * note : `params` are assumed fully validated at this stage. + */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, + ZSTD_CCtx_params const* params, U64 const pledgedSrcSize, + size_t const loadedDictSize, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zc->isFirstBlock = 1; - if (params.ldmParams.enableLdm) { + /* Set applied params early so we can modify them for LDM, + * and point params at the applied params. + */ + zc->appliedParams = *params; + params = &zc->appliedParams; + + assert(params->useRowMatchFinder != ZSTD_ps_auto); + assert(params->useBlockSplitter != ZSTD_ps_auto); + assert(params->ldmParams.enableLdm != ZSTD_ps_auto); + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); + ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); + assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); + assert(params->ldmParams.hashRateLog < 32); } - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; - size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) ? windowSize + blockSize : 0; - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); ZSTD_indexResetPolicy_e needsIndexReset = - (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, buffInSize, buffOutSize, pledgedSrcSize); + int resizeWorkspace; + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - /* Check if workspace is large enough, alloc a new one if needed */ - { + { /* Check if workspace is large enough, alloc a new one if needed */ int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - + resizeWorkspace = workspaceTooSmall || workspaceWasteful; DEBUGLOG(4, "Need %zu B workspace", neededSpace); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - if (workspaceTooSmall || workspaceWasteful) { + if (resizeWorkspace) { DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", ZSTD_cwksp_sizeof(ws) >> 10, neededSpace >> 10); @@ -1629,14 +1831,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); } } ZSTD_cwksp_clear(ws); /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; + zc->blockState.matchState.cParams = params->cParams; zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; zc->consumedSrcSize = 0; zc->producedCSize = 0; @@ -1667,11 +1868,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* TODO: avoid memset? */ size_t const numBuckets = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); + ((size_t)1) << (params->ldmParams.hashLog - + params->ldmParams.bucketSizeLog); zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); } @@ -1687,32 +1888,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, FORWARD_IF_ERROR(ZSTD_reset_matchState( &zc->blockState.matchState, ws, - ¶ms.cParams, + ¶ms->cParams, + params->useRowMatchFinder, crp, needsIndexReset, ZSTD_resetTarget_CCtx), ""); /* ldm hash table */ - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); zc->ldmState.loadedDictEnd = 0; } - /* Due to alignment, when reusing a workspace, we can actually consume - * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h - */ - assert(ZSTD_cwksp_used(ws) >= neededSpace && - ZSTD_cwksp_used(ws) <= neededSpace + 3); - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); + zc->initialized = 1; return 0; @@ -1768,6 +1965,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; @@ -1783,7 +1982,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, cdict->dictContentSize, ZSTD_cpm_attachDict); params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_makeClean, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); } @@ -1827,15 +2028,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; assert(!cdict->matchState.dedicatedDictSearch); - - DEBUGLOG(4, "copying dictionary into context"); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + params.useRowMatchFinder = cdict->useRowMatchFinder; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); @@ -1843,17 +2046,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_ps_auto); /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; size_t const hSize = (size_t)1 << cdict_cParams->hashLog; ZSTD_memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, hSize * sizeof(U32)); - ZSTD_memcpy(cctx->blockState.matchState.chainTable, + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_memcpy(cctx->blockState.matchState.chainTable, cdict->matchState.chainTable, chainSize * sizeof(U32)); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize*sizeof(U16); + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + } } /* Zero the hashTable3, since the cdict never fills it */ @@ -1917,16 +2133,22 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, "Can't copy a ctx that's not in init stage."); - + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; + params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; + params.ldmParams = srcCCtx->appliedParams.ldmParams; params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); @@ -1938,7 +2160,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; int const h3log = srcCCtx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; @@ -2005,6 +2231,8 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa int const nbRows = (int)size / ZSTD_ROWSIZE; int cellNb = 0; int rowNb; + /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ + U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ assert(size < (1U<<31)); /* can be casted to int */ @@ -2012,12 +2240,17 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa for (rowNb=0 ; rowNb < nbRows ; rowNb++) { int column; for (column=0; columnhashTable, hSize, reducerValue); } - if (params->cParams.strategy != ZSTD_fast) { + if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { U32 const chainSize = (U32)1 << params->cParams.chainLog; if (params->cParams.strategy == ZSTD_btlazy2) ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); @@ -2072,14 +2305,14 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) assert(nbSeq <= seqStorePtr->maxNbSeq); for (u=0; ulongLengthID==1) + if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) + if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } @@ -2093,10 +2326,161 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } -/* ZSTD_entropyCompressSequences_internal(): - * actually compresses both literals and sequences */ +/* ZSTD_blockSplitterEnabled(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * At the time this function is called, the parameter must be finalized. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); + assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); + return (cctxParams->useBlockSplitter == ZSTD_ps_enable); +} + +/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types + * and size of the sequences statistics + */ +typedef struct { + U32 LLtype; + U32 Offtype; + U32 MLtype; + size_t size; + size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_symbolEncodingTypeStats_t; + +/* ZSTD_buildSequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. + * Modifies `nextEntropy` to have the appropriate values as a side effect. + * nbSeq must be greater than 0. + * + * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) { + BYTE* const ostart = dst; + const BYTE* const oend = dstEnd; + BYTE* op = ostart; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + ZSTD_symbolEncodingTypeStats_t stats; + + stats.lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + assert(op <= oend); + assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, + countWorkspace, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); + stats.size = countSize; + return stats; + } + if (stats.LLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, + countWorkspace, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); + stats.size = countSize; + return stats; + } + if (stats.Offtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, + countWorkspace, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); + stats.size = countSize; + return stats; + } + if (stats.MLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + stats.size = (size_t)(op-ostart); + return stats; +} + +/* ZSTD_entropyCompressSeqStore_internal(): + * compresses both literals and sequences + * Returns compressed size of block, or a zstd error. + */ +#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 MEM_STATIC size_t -ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2110,36 +2494,38 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; + size_t lastCountSize; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; + size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; + size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; + /* Base suspicion of uncompressibility on ratio of literals to sequences */ + unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); size_t const litSize = (size_t)(seqStorePtr->lit - literals); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), + ZSTD_literalsCompressionIsDisabled(cctxParams), op, dstCapacity, literals, litSize, entropyWorkspace, entropyWkspSize, - bmi2); + bmi2, suspectUncompressible); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; @@ -2165,95 +2551,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); return (size_t)(op - ostart); } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + { + ZSTD_symbolEncodingTypeStats_t stats; + BYTE* seqHead = op++; + /* build stats for sequences */ + stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, count, + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); + lastCountSize = stats.lastCountSize; + op += stats.size; + } { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), @@ -2273,9 +2584,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, * In this exceedingly rare case, we will simply emit an uncompressed * block, since it isn't worth optimizing. */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0; @@ -2287,7 +2598,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t -ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2296,7 +2607,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { - size_t const cSize = ZSTD_entropyCompressSequences_internal( + size_t const cSize = ZSTD_entropyCompressSeqStore_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, entropyWorkspace, entropyWkspSize, bmi2); @@ -2306,20 +2617,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); if (cSize >= maxCSize) return 0; /* block not compressed */ } - DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); return cSize; } /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) { static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -2367,7 +2678,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { ZSTD_compressBlock_greedy_row, + ZSTD_compressBlock_lazy_row, + ZSTD_compressBlock_lazy2_row }, + { ZSTD_compressBlock_greedy_extDict_row, + ZSTD_compressBlock_lazy_extDict_row, + ZSTD_compressBlock_lazy2_extDict_row }, + { ZSTD_compressBlock_greedy_dictMatchState_row, + ZSTD_compressBlock_lazy_dictMatchState_row, + ZSTD_compressBlock_lazy2_dictMatchState_row }, + { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_ps_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } assert(selectedCompressor != NULL); return selectedCompressor; } @@ -2383,7 +2715,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) { ssPtr->lit = ssPtr->litStart; ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; + ssPtr->longLengthType = ZSTD_llt_none; } typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; @@ -2430,15 +2762,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(!zc->appliedParams.ldmParams.enableLdm); + assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&zc->externSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm) { + } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; @@ -2452,10 +2785,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&ldmSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -2483,22 +2819,22 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); for (i = 0; i < seqStoreSeqSize; ++i) { - U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; outSeqs[i].litLength = seqStoreSeqs[i].litLength; - outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; outSeqs[i].rep = 0; if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { + } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { outSeqs[i].matchLength += 0x10000; } } - if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { /* Derive the correct offset corresponding to a repcode */ - outSeqs[i].rep = seqStoreSeqs[i].offset; + outSeqs[i].rep = seqStoreSeqs[i].offBase; if (outSeqs[i].litLength != 0) { rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; } else { @@ -2512,9 +2848,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) outSeqs[i].offset = rawOffset; /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode so we provide seqStoreSeqs[i].offset - 1 */ - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, - seqStoreSeqs[i].offset - 1, - seqStoreSeqs[i].litLength == 0); + ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offBase - 1, + seqStoreSeqs[i].litLength == 0); literalsRead += outSeqs[i].litLength; } /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. @@ -2602,16 +2938,740 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore) return nbSeqs < 4 && nbLits < 10; } -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) +{ + ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; + bs->prevCBlock = bs->nextCBlock; + bs->nextCBlock = tmp; +} + +/* Writes the block header */ +static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); +} + +/* ZSTD_buildBlockEntropyStats_literals() : + * Builds entropy for the literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * Requires ENTROPY_WORKSPACE_SIZE workspace + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int literalsCompressionIsDisabled, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (literalsCompressionIsDisabled) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +#ifndef COMPRESS_LITERALS_SIZE_MIN +#define COMPRESS_LITERALS_SIZE_MIN 63 +#endif + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + + +/* ZSTD_buildDummySequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, + * and updates nextEntropy to the appropriate repeatMode. + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; + nextEntropy->litlength_repeatMode = FSE_repeat_none; + nextEntropy->offcode_repeatMode = FSE_repeat_none; + nextEntropy->matchlength_repeatMode = FSE_repeat_none; + return stats; +} + +/* ZSTD_buildBlockEntropyStats_sequences() : + * Builds entropy for the sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * Requires ENTROPY_WORKSPACE_SIZE wksp. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + unsigned* countWorkspace = (unsigned*)workspace; + unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); + size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); + ZSTD_symbolEncodingTypeStats_t stats; + + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); + stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + prevEntropy, nextEntropy, op, oend, + strategy, countWorkspace, + entropyWorkspace, entropyWorkspaceSize) + : ZSTD_buildDummySequencesStatistics(nextEntropy); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; + fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; + fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; + fseMetadata->lastCountSize = stats.lastCountSize; + return stats.size; +} + + +/* ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * Requires workspace size ENTROPY_WORKSPACE_SIZE + * + * @return : 0 on success or error code + */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_literalsCompressionIsDisabled(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); + return 0; +} + +/* Returns the size estimate for the literals section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ +static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U8* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + (void)defaultMax; + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + return nbSeq * 10; + } + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits >> 3; +} + +/* Returns the size estimate for the sequences section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +/* Returns the size estimate for a given stream of literals, of, ll, ml */ +static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + +/* Builds entropy statistics and uses them for blocksize estimation. + * + * Returns the estimated compressed size of the seqStore, or a zstd error. + */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { + ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; + DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); +} + +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { + size_t literalsBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { + size_t matchBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.mlBase + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. + */ +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, + const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) { + BYTE* const litEnd = originalSeqStore->lit; + size_t literalsBytes; + size_t literalsBytesPreceding = 0; + + *resultSeqStore = *originalSeqStore; + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + /* Move longLengthPos into the correct position if necessary */ + if (originalSeqStore->longLengthType != ZSTD_llt_none) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthType = ZSTD_llt_none; + } else { + resultSeqStore->longLengthPos -= (U32)startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->litStart += literalsBytesPreceding; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ + resultSeqStore->lit = litEnd; + } else { + resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + +/* + * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. + * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). + */ +static U32 +ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) +{ + U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ + assert(STORED_IS_REPCODE(offCode)); + if (adjustedOffCode == ZSTD_REP_NUM) { + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ + assert(rep[0] > 0); + return rep[0] - 1; + } + return rep[adjustedOffCode]; +} + +/* + * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise + * due to emission of RLE/raw blocks that disturb the offset history, + * and replaces any repcodes within the seqStore that may be invalid. + * + * dRepcodes are updated as would be on the decompression side. + * cRepcodes are updated exactly in accordance with the seqStore. + * + * Note : this function assumes seq->offBase respects the following numbering scheme : + * 0 : invalid + * 1-3 : repcode 1-3 + * 4+ : real_offset+3 + */ +static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + seqStore_t* const seqStore, U32 const nbSeq) { + U32 idx = 0; + for (; idx < nbSeq; ++idx) { + seqDef* const seq = seqStore->sequencesStart + idx; + U32 const ll0 = (seq->litLength == 0); + U32 const offCode = OFFBASE_TO_STORED(seq->offBase); + assert(seq->offBase > 0); + if (STORED_IS_REPCODE(offCode)) { + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); + /* Adjust simulated decompression repcode history if we come across a mismatch. Replace + * the repcode with the offset it actually references, determined by the compression + * repcode history. + */ + if (dRawOffset != cRawOffset) { + seq->offBase = cRawOffset + ZSTD_REP_NUM; + } + } + /* Compression repcode history is always updated with values directly from the unmodified seqStore. + * Decompression repcode history may use modified seq->offset value taken from compression repcode history. + */ + ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); + ZSTD_updateRep(cRepcodes->rep, offCode, ll0); + } +} + +/* ZSTD_compressSeqStore_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the total size of that block (including header) or a ZSTD error code. + */ +static size_t +ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) { - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize; + + /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ + repcodes_t const dRepOriginal = *dRep; + DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); + if (isPartition) + ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit"); + cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +/* Struct to keep track of where we are in our recursive calls. */ +typedef struct { + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 + +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then + * we do not recurse. + * + * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. + */ +static void +ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + ZSTD_CCtx* zc, const seqStore_t* origSeqStore) +{ + seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; + seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; + seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; + + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { + DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); + return; + } + DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); + DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", + estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = (U32)midIdx; + splits->idx++; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); + } +} + +/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. + * + * Returns the number of splits made (which equals the size of the partition table - 1). + */ +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { + seqStoreSplits splits = {partitions, 0}; + if (nbSeq <= 4) { + DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); + splits.splitLocations[splits.idx] = nbSeq; + DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); + return splits.idx; +} + +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple blocks to improve compression ratio. + * + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. + */ +static size_t +ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) +{ + size_t cSize = 0; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + size_t i = 0; + size_t srcBytesTotal = 0; + U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ + seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; + seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; + size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + + /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history + * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two + * separate repcode histories that simulate repcode history on compression and decompression side, + * and use the histories to determine whether we must replace a particular repcode with its raw offset. + * + * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed + * or RLE. This allows us to retrieve the offset value that an invalid repcode references within + * a nocompress/RLE block. + * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use + * the replacement offset value rather than the original repcode to update the repcode history. + * dRep also will be the final repcode history sent to the next block. + * + * See ZSTD_seqStore_resolveOffCodes() for more details. + */ + repcodes_t dRep; + repcodes_t cRep; + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); + + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + if (numSplits == 0) { + size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); + assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + return cSizeSingleBlock; + } + + ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); + for (i = 0; i <= numSplits; ++i) { + size_t srcBytes; + size_t cSizeChunk; + U32 const lastPartition = (i == numSplits); + U32 lastBlockEntireSrc = 0; + + srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); + srcBytesTotal += srcBytes; + if (lastPartition) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; + lastBlockEntireSrc = lastBlock; + } else { + ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); + } + + cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore, + &dRep, &cRep, + op, dstCapacity, + ip, srcBytes, + lastBlockEntireSrc, 1 /* isPartition */); + DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + *currSeqStore = *nextSeqStore; + assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + } + /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes + * for the next block. + */ + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); + return cSize; +} + +static size_t +ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) +{ + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); + return cSize; + } + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + } + + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); + return cSize; } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) +static size_t +ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) { /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold @@ -2632,12 +3692,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (zc->seqCollector.collectSequences) { ZSTD_copyBlockSequences(zc); - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return 0; } /* encode sequences and literals */ - cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, dst, dstCapacity, @@ -2645,12 +3705,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - return 0; - } - - if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -2666,7 +3720,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2719,7 +3773,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); return cSize; } } @@ -2759,9 +3813,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, void const* ip, void const* iend) { - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const maxDist = (U32)1 << params->cParams.windowLog; + if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); @@ -2784,7 +3838,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, * Frame is supposed already started (header already produced) * @return : compressed size, or an error code */ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastFrameChunk) @@ -2814,6 +3868,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, ZSTD_overflowCorrectIfNeeded( ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; @@ -2824,6 +3879,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); assert(cSize > 0); assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); } else { cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, @@ -2946,7 +4005,7 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe { RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, parameter_unsupported, "incompatible with ldm"); cctx->externSeqStore.seq = seq; @@ -2983,11 +4042,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - if (!ZSTD_window_update(&ms->window, src, srcSize)) { + if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { + ms->forceNonContiguous = 0; ms->nextToUpdate = ms->window.dictLimit; } - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); } if (!frame) { @@ -3055,63 +4115,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; + int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; - ZSTD_window_update(&ms->window, src, srcSize); + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize > ZSTD_CHUNKSIZE_MAX) { + /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. + * Dictionaries right at the edge will immediately trigger overflow + * correction, but I don't want to insert extra constraints here. + */ + U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; + /* We must have cleared our windows when our source is this large. */ + assert(ZSTD_window_isEmpty(ms->window)); + if (loadLdmDict) + assert(ZSTD_window_isEmpty(ls->window)); + /* If the dictionary is too large, only load the suffix of the dictionary. */ + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); + ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + ms->forceNonContiguous = params->deterministicRefPrefix; - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); + if (loadLdmDict) { + ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); } - /* Assert that we the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - if (srcSize <= HASH_READ_SIZE) return 0; - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; - - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + if (loadLdmDict) + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, dtlm); + break; - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { - assert(chunk == remaining); /* must load everything in one go */ - ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); - } else if (chunk >= HASH_READ_SIZE) { - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_ps_auto); + if (params->useRowMatchFinder == ZSTD_ps_enable) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; - - default: - assert(0); /* not possible : not a valid strategy id */ } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); + break; - ip = ichunk; + default: + assert(0); /* not possible : not a valid strategy id */ } ms->nextToUpdate = (U32)(iend - ms->window.base); @@ -3250,7 +4333,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, const BYTE* const dictEnd = dictPtr + dictSize; size_t dictID; size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); @@ -3321,6 +4403,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); @@ -3335,7 +4418,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + dictContentSize, ZSTDcrp_makeClean, zbuff) , ""); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( @@ -3350,7 +4434,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; - cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + cctx->dictContentSize = dictContentSize; } return 0; } @@ -3485,15 +4569,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params) { - ZSTD_CCtx_params cctxParams; DEBUGLOG(4, "ZSTD_compress_advanced"); FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - &cctxParams); + &cctx->simpleApiParams); } /* Internal */ @@ -3517,14 +4600,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_CCtx_params cctxParams; { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); assert(params.fParams.contentSizeFlag == 1); - ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); } DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); } size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, @@ -3561,7 +4643,10 @@ size_t ZSTD_estimateCDictSize_advanced( DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } @@ -3592,9 +4677,6 @@ static size_t ZSTD_initCDict_internal( assert(!ZSTD_checkCParams(params.cParams)); cdict->matchState.cParams = params.cParams; cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; - if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { - cdict->matchState.dedicatedDictSearch = 0; - } if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { @@ -3615,6 +4697,7 @@ static size_t ZSTD_initCDict_internal( &cdict->matchState, &cdict->workspace, ¶ms.cParams, + params.useRowMatchFinder, ZSTDcrp_makeClean, ZSTDirp_reset, ZSTD_resetTarget_CDict), ""); @@ -3638,14 +4721,17 @@ static size_t ZSTD_initCDict_internal( static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) + ZSTD_compressionParameters cParams, + ZSTD_paramSwitch_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { size_t const workspaceSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); @@ -3664,7 +4750,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ - + cdict->useRowMatchFinder = useRowMatchFinder; return cdict; } } @@ -3686,7 +4772,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, &cctxParams, customMem); } -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( +ZSTD_CDict* ZSTD_createCDict_advanced2( const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, @@ -3716,10 +4802,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); } + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, customMem); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, @@ -3788,7 +4877,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams) { - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) @@ -3813,6 +4904,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_CCtxParams_init(¶ms, 0); params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, @@ -3839,15 +4932,15 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) return cdict->dictID; } - -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( +/* ZSTD_compressBegin_usingCDict_internal() : + * Implementation of various ZSTD_compressBegin_usingCDict* functions. + */ +static size_t ZSTD_compressBegin_usingCDict_internal( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { ZSTD_CCtx_params cctxParams; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); /* Initialize the cctxParams from the cdict */ { @@ -3879,25 +4972,48 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTDb_not_buffered); } + +/* ZSTD_compressBegin_usingCDict_advanced() : + * This function is DEPRECATED. + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); +} + /* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ + * cdict must be != NULL */ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); } -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, +/*! ZSTD_compress_usingCDict_internal(): + * Implementation of various ZSTD_compress_usingCDict* functions. + */ +static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } +/*! ZSTD_compress_usingCDict_advanced(): + * This function is DEPRECATED. + */ +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + /*! ZSTD_compress_usingCDict() : * Compression using a digested Dictionary. * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. @@ -3909,7 +5025,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); } @@ -4313,8 +5429,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - if (cctx->cdict) - params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + if (cctx->cdict && !cctx->localDict.cdict) { + /* Let the cdict's compression level take priority over the requested params. + * But do not take the cdict's compression level if the "cdict" is actually a localDict + * generated from ZSTD_initLocalDict(). + */ + params.compressionLevel = cctx->cdict->compressionLevel; + } DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ { @@ -4327,11 +5448,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, dictSize, mode); } - if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { - /* Enable LDM by default for optimal parser and window size >= 128MB */ - DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); - params.ldmParams.enableLdm = 1; - } + params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); + params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); @@ -4436,39 +5555,39 @@ typedef struct { size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_sequencePosition; -/* Returns a ZSTD error code if sequence is not valid */ -static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, - size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { - size_t offsetBound; - U32 windowSize = 1 << windowLog; +/* ZSTD_validateSequence() : + * @offCode : is presumed to follow format required by ZSTD_storeSeq() + * @returns a ZSTD error code if sequence is not valid + */ +static size_t +ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize) +{ + U32 const windowSize = 1 << windowLog; /* posInSrc represents the amount of data the decoder would decode up to this point. * As long as the amount of data decoded is less than or equal to window size, offsets may be * larger than the total length of output decoded in order to reference the dict, even larger than * window size. After output surpasses windowSize, we're limited to windowSize offsets again. */ - offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; - RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); - RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); return 0; } /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ -static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { - U32 offCode = rawOffset + ZSTD_REP_MOVE; - U32 repCode = 0; +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) +{ + U32 offCode = STORE_OFFSET(rawOffset); if (!ll0 && rawOffset == rep[0]) { - repCode = 1; + offCode = STORE_REPCODE_1; } else if (rawOffset == rep[1]) { - repCode = 2 - ll0; + offCode = STORE_REPCODE(2 - ll0); } else if (rawOffset == rep[2]) { - repCode = 3 - ll0; + offCode = STORE_REPCODE(3 - ll0); } else if (ll0 && rawOffset == rep[0] - 1) { - repCode = 3; - } - if (repCode) { - /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ - offCode = repCode - 1; + offCode = STORE_REPCODE_3; } return offCode; } @@ -4476,18 +5595,17 @@ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. */ -static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize) { +static size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) +{ U32 idx = seqPos->idx; BYTE const* ip = (BYTE const*)(src); const BYTE* const iend = ip + blockSize; repcodes_t updatedRepcodes; U32 dictSize; - U32 litLength; - U32 matchLength; - U32 ll0; - U32 offCode; if (cctx->cdict) { dictSize = (U32)cctx->cdict->dictContentSize; @@ -4498,23 +5616,22 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS } ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { - litLength = inSeqs[idx].litLength; - matchLength = inSeqs[idx].matchLength; - ll0 = litLength == 0; - offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + U32 const litLength = inSeqs[idx].litLength; + U32 const ll0 = (litLength == 0); + U32 const matchLength = inSeqs[idx].matchLength; + U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - cctx->appliedParams.cParams.minMatch), + cctx->appliedParams.cParams.windowLog, dictSize), "Sequence validation failed"); } RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); @@ -4541,9 +5658,11 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS * avoid splitting a match, or to avoid splitting a match such that it would produce a match * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. */ -static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, - const void* src, size_t blockSize) { +static size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) +{ U32 idx = seqPos->idx; U32 startPosInSequence = seqPos->posInSequence; U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; @@ -4553,10 +5672,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq repcodes_t updatedRepcodes; U32 bytesAdjustment = 0; U32 finalMatchSplit = 0; - U32 litLength; - U32 matchLength; - U32 rawOffset; - U32 offCode; if (cctx->cdict) { dictSize = cctx->cdict->dictContentSize; @@ -4570,9 +5685,10 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { const ZSTD_Sequence currSeq = inSeqs[idx]; - litLength = currSeq.litLength; - matchLength = currSeq.matchLength; - rawOffset = currSeq.offset; + U32 litLength = currSeq.litLength; + U32 matchLength = currSeq.matchLength; + U32 const rawOffset = currSeq.offset; + U32 offCode; /* Modify the sequence depending on where endPosInSequence lies */ if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { @@ -4625,22 +5741,21 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq } } /* Check if this offset can be represented with a repcode */ - { U32 ll0 = (litLength == 0); + { U32 const ll0 = (litLength == 0); offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); - updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); } if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - cctx->appliedParams.cParams.minMatch), + cctx->appliedParams.cParams.windowLog, dictSize), "Sequence validation failed"); } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); ip += matchLength + litLength; } DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); @@ -4665,7 +5780,8 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize); -static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) +{ ZSTD_sequenceCopier sequenceCopier = NULL; assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); if (mode == ZSTD_sf_explicitBlockDelimiters) { @@ -4679,12 +5795,15 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) /* Compress, block-by-block, all of the sequences given. * - * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + * Returns the cumulative size of all compressed blocks (including their headers), + * otherwise a ZSTD error. */ -static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) { +static size_t +ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) +{ size_t cSize = 0; U32 lastBlock; size_t blockSize; @@ -4694,7 +5813,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, BYTE const* ip = (BYTE const*)src; BYTE* op = (BYTE*)dst; - ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); /* Special case: empty frame */ @@ -4732,7 +5851,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, continue; } - compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, &cctx->appliedParams, op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, @@ -4764,7 +5883,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, } else { U32 cBlockHeader; /* Error checking and repcodes update */ - ZSTD_confirmRepcodesAndEntropyTables(cctx); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; @@ -4794,7 +5913,8 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize) { + const void* src, size_t srcSize) +{ BYTE* op = (BYTE*)dst; size_t cSize = 0; size_t compressedBlocksSize = 0; @@ -4861,117 +5981,11 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) /*-===== Pre-defined compression levels =====-*/ +#include "clevels.h" -#define ZSTD_MAX_CLEVEL 22 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -}; +int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) { @@ -4999,7 +6013,7 @@ static int ZSTD_dedicatedDictSearch_isSupported( { return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2) - && (cParams->hashLog >= cParams->chainLog) + && (cParams->hashLog > cParams->chainLog) && (cParams->chainLog <= 24); } @@ -5018,6 +6032,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams( case ZSTD_lazy: case ZSTD_lazy2: cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + if (cParams->hashLog < ZSTD_HASHLOG_MIN) { + cParams->hashLog = ZSTD_HASHLOG_MIN; + } break; case ZSTD_btlazy2: case ZSTD_btopt: @@ -5066,6 +6083,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, else row = compressionLevel; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); /* acceleration factor */ if (compressionLevel < 0) { int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h index 685d2f996cc2a..71697a11ae305 100644 --- a/lib/zstd/compress/zstd_compress_internal.h +++ b/lib/zstd/compress/zstd_compress_internal.h @@ -57,7 +57,7 @@ typedef struct { } ZSTD_localDict; typedef struct { - HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; HUF_repeat repeatMode; } ZSTD_hufCTables_t; @@ -75,8 +75,55 @@ typedef struct { ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; +/* ********************************************* +* Entropy buffer statistics structs and funcs * +***********************************************/ +/* ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ typedef struct { - U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/* ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/* ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * @return : 0 on success or error code */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/* ******************************* +* Compression internals structs * +*********************************/ + +typedef struct { + U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ U32 len; /* Raw length of match */ } ZSTD_match_t; @@ -126,7 +173,7 @@ typedef struct { U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; } optState_t; typedef struct { @@ -135,14 +182,23 @@ typedef struct { } ZSTD_compressedBlockState_t; typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ + U32 nbOverflowCorrections; /* Number of times overflow correction has run since + * ZSTD_window_init(). Useful for debugging coredumps + * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. + */ } ZSTD_window_t; +#define ZSTD_WINDOW_START_INDEX 2 + typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. @@ -154,9 +210,17 @@ struct ZSTD_matchState_t { */ U32 nextToUpdate; /* index from which to continue table update */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U32* hashTable; U32* hashTable3; U32* chainTable; + + U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + int dedicatedDictSearch; /* Indicates whether this matchState is using the * dedicated dictionary search structure. */ @@ -196,7 +260,7 @@ typedef struct { } ldmState_t; typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ + ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ U32 hashLog; /* Log size of hashTable */ U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ @@ -227,7 +291,7 @@ struct ZSTD_CCtx_params_s { * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; - ZSTD_literalCompressionMode_e literalCompressionMode; + ZSTD_paramSwitch_e literalCompressionMode; /* Multithreading: used to pass parameters to mtctx */ int nbWorkers; @@ -249,6 +313,15 @@ struct ZSTD_CCtx_params_s { ZSTD_sequenceFormat_e blockDelimiters; int validateSequences; + /* Block splitting */ + ZSTD_paramSwitch_e useBlockSplitter; + + /* Param for deciding whether to use row-based matchfinder */ + ZSTD_paramSwitch_e useRowMatchFinder; + + /* Always load a dictionary in ext-dict mode (not prefix mode)? */ + int deterministicRefPrefix; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -266,12 +339,29 @@ typedef enum { ZSTDb_buffered } ZSTD_buffered_policy_e; +/* + * Struct that contains all elements of block splitter that should be allocated + * in a wksp. + */ +#define ZSTD_MAX_NB_BLOCK_SPLITS 196 +typedef struct { + seqStore_t fullSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + seqStore_t currSeqStore; + seqStore_t nextSeqStore; + + U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; + ZSTD_entropyCTablesMetadata_t entropyMetadata; +} ZSTD_blockSplitCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ ZSTD_CCtx_params requestedParams; ZSTD_CCtx_params appliedParams; + ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ U32 dictID; size_t dictContentSize; @@ -296,7 +386,7 @@ struct ZSTD_CCtx_s { ZSTD_blockState_t blockState; U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ - /* Wether we are streaming or not */ + /* Whether we are streaming or not */ ZSTD_buffered_policy_e bufferedPolicy; /* streaming */ @@ -324,6 +414,9 @@ struct ZSTD_CCtx_s { /* Multi-threading */ /* Tracing */ + + /* Workspace for block splitter */ + ZSTD_blockSplitCtx blockSplitCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -358,7 +451,7 @@ typedef enum { typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -392,31 +485,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; - -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - ZSTD_memcpy(&newReps, rep, sizeof(newReps)); - } - } - return newReps; -} - /* ZSTD_cParam_withinBounds: * @return 1 if value is within cParam bounds, * 0 otherwise */ @@ -465,17 +533,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) { switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: + case ZSTD_ps_enable: return 0; - case ZSTD_lcm_uncompressed: + case ZSTD_ps_disable: return 1; default: assert(0 /* impossible: pre-validated */); ZSTD_FALLTHROUGH; - case ZSTD_lcm_auto: + case ZSTD_ps_auto: return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); } } @@ -485,7 +553,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single * large copies. */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { +static void +ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) +{ assert(iend > ilimit_w); if (ip <= ilimit_w) { ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); @@ -495,14 +565,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie while (ip < iend) *op++ = *ip++; } +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +#define STORE_REPCODE_1 STORE_REPCODE(1) +#define STORE_REPCODE_2 STORE_REPCODE(2) +#define STORE_REPCODE_3 STORE_REPCODE(3) +#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) +#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) +#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) +#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) +#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) +#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ +#define STORED_TO_OFFBASE(o) ((o)+1) +#define OFFBASE_TO_STORED(o) ((o)-1) + /*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH + * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. + * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). + * @matchLength : must be >= MINMATCH * Allowed to overread literals up to litLimit. */ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +HINT_INLINE UNUSED_ATTR void +ZSTD_storeSeq(seqStore_t* seqStorePtr, + size_t litLength, const BYTE* literals, const BYTE* litLimit, + U32 offBase_minus1, + size_t matchLength) { BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; BYTE const* const litEnd = literals + litLength; @@ -511,7 +597,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); @@ -535,26 +621,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera /* literal Length */ if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } seqStorePtr->sequences[0].litLength = (U16)litLength; /* match offset */ - seqStorePtr->sequences[0].offset = offCode + 1; + seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + assert(matchLength >= MINMATCH); + { size_t const mlBase = matchLength - MINMATCH; + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].mlBase = (U16)mlBase; } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; seqStorePtr->sequences++; } +/* ZSTD_updateRep() : + * updates in-place @rep (array of repeat offsets) + * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() + */ +MEM_STATIC void +ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ + if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = STORED_OFFSET(offBase_minus1); + } else { /* repcode */ + U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } else { /* repCode == 0 */ + /* nothing to do */ + } + } +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t +ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ + repcodes_t newReps; + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); + return newReps; +} + /*-************************************* * Match length counter @@ -778,6 +904,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) window->dictLimit = end; } +MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) +{ + return window.dictLimit == ZSTD_WINDOW_START_INDEX && + window.lowLimit == ZSTD_WINDOW_START_INDEX && + (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; +} + /* * ZSTD_window_hasExtDict(): * Returns non-zero if the window has a non-empty extDict. @@ -801,15 +934,71 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) ZSTD_noDict; } +/* Defining this macro to non-zero tells zstd to run the overflow correction + * code much more frequently. This is very inefficient, and should only be + * used for tests and fuzzers. + */ +#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY +# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 +# else +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 +# endif +#endif + +/* + * ZSTD_window_canOverflowCorrect(): + * Returns non-zero if the indices are large enough for overflow correction + * to work correctly without impacting compression ratio. + */ +MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src) +{ + U32 const cycleSize = 1u << cycleLog; + U32 const curr = (U32)((BYTE const*)src - window.base); + U32 const minIndexToOverflowCorrect = cycleSize + + MAX(maxDist, cycleSize) + + ZSTD_WINDOW_START_INDEX; + + /* Adjust the min index to backoff the overflow correction frequency, + * so we don't waste too much CPU in overflow correction. If this + * computation overflows we don't really care, we just need to make + * sure it is at least minIndexToOverflowCorrect. + */ + U32 const adjustment = window.nbOverflowCorrections + 1; + U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, + minIndexToOverflowCorrect); + U32 const indexLargeEnough = curr > adjustedIndex; + + /* Only overflow correct early if the dictionary is invalidated already, + * so we don't hurt compression ratio. + */ + U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; + + return indexLargeEnough && dictionaryInvalidated; +} + /* * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow * protection. */ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src, void const* srcEnd) { U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { + return 1; + } + } return curr > ZSTD_CURRENT_MAX; } @@ -821,7 +1010,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, * * The least significant cycleLog bits of the indices must remain the same, * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. */ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, U32 maxDist, void const* src) @@ -845,32 +1033,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); - U32 const currentCycle0 = curr & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; + U32 const currentCycle = curr & cycleMask; + /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ + U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX + ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) + : 0; + U32 const newCurrent = currentCycle + + currentCycleCorrection + + MAX(maxDist, cycleSize); U32 const correction = curr - newCurrent; - assert((maxDist & cycleMask) == 0); + /* maxDist must be a power of two so that: + * (newCurrent & cycleMask) == (curr & cycleMask) + * This is required to not corrupt the chains / binary tree. + */ + assert((maxDist & (maxDist - 1)) == 0); + assert((curr & cycleMask) == (newCurrent & cycleMask)); assert(curr > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); + if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + } window->base += correction; window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; + if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->lowLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->lowLimit -= correction; + } + if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->dictLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->dictLimit -= correction; + } /* Ensure we can still reference the full window. */ assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); + assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); /* Ensure that lowLimit and dictLimit didn't underflow. */ assert(window->lowLimit <= newCurrent); assert(window->dictLimit <= newCurrent); + ++window->nbOverflowCorrections; + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, window->lowLimit); return correction; @@ -975,11 +1183,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { ZSTD_memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ + window->base = (BYTE const*)" "; + window->dictBase = (BYTE const*)" "; + ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ + window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ + window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ + window->nbOverflowCorrections = 0; } /* @@ -990,7 +1200,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { * Returns non-zero if the segment is contiguous. */ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) + void const* src, size_t srcSize, + int forceNonContiguous) { BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; @@ -1000,7 +1211,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, assert(window->base != NULL); assert(window->dictBase != NULL); /* Check if blocks follow each other */ - if (src != window->nextSrc) { + if (src != window->nextSrc || forceNonContiguous) { /* not contiguous */ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); @@ -1030,15 +1241,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, */ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't * valid for the entire block. So this check is sufficient to find the lowest valid match index. */ - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; return matchLowest; } diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c index 655bcda4d1f1c..52b0a8059aba9 100644 --- a/lib/zstd/compress/zstd_compress_literals.c +++ b/lib/zstd/compress/zstd_compress_literals.c @@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2) + const int bmi2, + unsigned suspectUncompressible) { size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); @@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, HUF_compress1X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : HUF_compress4X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible); if (repeat != HUF_repeat_none) { /* reused the existing table */ DEBUGLOG(5, "Reusing previous huffman table"); @@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, } } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h index 9904c0cd30a0a..9775fb97cb702 100644 --- a/lib/zstd/compress/zstd_compress_literals.h +++ b/lib/zstd/compress/zstd_compress_literals.h @@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); +/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ZSTD_hufCTables_t* nextHuf, ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2); + const int bmi2, + unsigned suspectUncompressible); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c index dcfcdc9cc5e8d..21ddc1b37acf8 100644 --- a/lib/zstd/compress/zstd_compress_sequences.c +++ b/lib/zstd/compress/zstd_compress_sequences.c @@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t { unsigned cost = 0; unsigned s; + + assert(total > 0); for (s = 0; s <= max; ++s) { unsigned norm = (unsigned)((256 * count[s]) / total); if (count[s] != 0 && norm == 0) @@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, assert(nbSeq_1 > 1); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); (void)entropyWorkspaceSize; - FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed"); + assert(oend >= op); + { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed"); return NCountSize; } } @@ -310,19 +313,19 @@ ZSTD_encodeSequences_body( FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); if (longOffsets) { U32 const ofBits = ofCodeTable[nbSeq-1]; unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits); BIT_flushBits(&blockStream); } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits, ofBits - extraBits); } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); } BIT_flushBits(&blockStream); @@ -336,8 +339,8 @@ ZSTD_encodeSequences_body( U32 const mlBits = ML_bits[mlCode]; DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", (unsigned)sequences[n].litLength, - (unsigned)sequences[n].matchLength + MINMATCH, - (unsigned)sequences[n].offset); + (unsigned)sequences[n].mlBase + MINMATCH, + (unsigned)sequences[n].offBase); /* 32b*/ /* 64b*/ /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ @@ -348,18 +351,18 @@ ZSTD_encodeSequences_body( BIT_flushBits(&blockStream); /* (7)*/ BIT_addBits(&blockStream, sequences[n].litLength, llBits); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (longOffsets) { unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_addBits(&blockStream, sequences[n].offBase, extraBits); BIT_flushBits(&blockStream); /* (7)*/ } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + BIT_addBits(&blockStream, sequences[n].offBase >> extraBits, ofBits - extraBits); /* 31 */ } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ } BIT_flushBits(&blockStream); /* (7)*/ DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); @@ -396,7 +399,7 @@ ZSTD_encodeSequences_default( #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_encodeSequences_bmi2( void* dst, size_t dstCapacity, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c index b0610b255653f..17d836cc84e8f 100644 --- a/lib/zstd/compress/zstd_compress_superblock.c +++ b/lib/zstd/compress/zstd_compress_superblock.c @@ -15,289 +15,10 @@ #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ #include "hist.h" /* HIST_countFast_wksp */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ #include "zstd_compress_sequences.h" #include "zstd_compress_literals.h" -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/* ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/* ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/* ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable_wksp( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/* ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - ZSTD_memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/* ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - /* ZSTD_compressSubBlock_literal() : * Compresses literals section for a sub-block. * When we have to write the Huffman table we will sometimes choose a header @@ -411,8 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* const seqDef* sp = sstart; size_t matchLengthSum = 0; size_t litLengthSum = 0; - /* Only used by assert(), suppress unused variable warnings in production. */ - (void)litLengthSum; + (void)(litLengthSum); /* suppress unused variable warning on some environments */ while (send-sp > 0) { ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); litLengthSum += seqLen.litLength; @@ -605,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, const BYTE* codeTable, unsigned maxCode, size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, + const U8* additionalBits, short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, void* workspace, size_t wkspSize) { @@ -646,8 +366,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, void* workspace, size_t wkspSize, int writeEntropy) { - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ size_t cSeqSizeEstimate = 0; + if (nbSeq == 0) return sequencesSectionHeaderSize; cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, nbSeq, fseTables->offcodeCTable, NULL, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, @@ -754,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, /* I think there is an optimization opportunity here. * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful * since it recalculates estimate from scratch. - * For example, it would recount literal distribution and symbol codes everytime. + * For example, it would recount literal distribution and symbol codes every time. */ cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, &nextCBlock->entropy, entropyMetadata, @@ -818,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, repcodes_t rep; ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); for (seq = sstart; seq < sp; ++seq) { - rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); } ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); } @@ -833,7 +554,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, unsigned lastBlock) { ZSTD_entropyCTablesMetadata_t entropyMetadata; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h index 98e359adf5d44..349fc923c355a 100644 --- a/lib/zstd/compress/zstd_cwksp.h +++ b/lib/zstd/compress/zstd_cwksp.h @@ -32,6 +32,10 @@ #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 #endif + +/* Set our tables and aligneds to align by 64 bytes */ +#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 + /*-************************************* * Structures ***************************************/ @@ -114,10 +118,11 @@ typedef enum { * - Tables: these are any of several different datastructures (hash tables, * chain tables, binary trees) that all respect a common format: they are * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. + * Their sizes depend on the cparams. These tables are 64-byte aligned. * * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. + * alignment, but don't require any initialization before they're used. These + * buffers are each aligned to 64 bytes. * * - Buffers: these buffers are used for various purposes that don't require * any alignment or initialization before they're used. This means they can @@ -130,8 +135,7 @@ typedef enum { * * 1. Objects * 2. Buffers - * 3. Aligned - * 4. Tables + * 3. Aligned/Tables * * Attempts to reserve objects of different types out of order will fail. */ @@ -184,6 +188,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { * Since tables aren't currently redzoned, you don't need to call through this * to figure out how much space you need for the matchState tables. Everything * else is though. + * + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). */ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { if (size == 0) @@ -191,66 +197,139 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { return size; } -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( - ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { +/* + * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. + * Used to determine the number of bytes required for a given "aligned". + */ +MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); +} + +/* + * Returns the amount of additional space the cwksp must allocate + * for internal purposes (currently only alignment). + */ +MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { + /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes + * to align the beginning of tables section, as well as another n_2=[0, 63] bytes + * to align the beginning of the aligned section. + * + * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and + * aligneds being sized in multiples of 64 bytes. + */ + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; + return slackSpace; +} + + +/* + * Return the number of additional bytes required to align a pointer to the given number of bytes. + * alignBytes must be a power of two. + */ +MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { + size_t const alignBytesMask = alignBytes - 1; + size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; + assert((alignBytes & alignBytesMask) == 0); + assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); + return bytes; +} + +/* + * Internal function. Do not use directly. + * Reserves the given number of bytes within the aligned/buffer segment of the wksp, + * which counts from the end of the wksp (as opposed to the object/table segment). + * + * Returns a pointer to the beginning of that space. + */ +MEM_STATIC void* +ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) +{ + void* const alloc = (BYTE*)ws->allocStart - bytes; + void* const bottom = ws->tableEnd; + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + /* the area is reserved from the end of wksp. + * If it overlaps with tableValidEnd, it voids guarantees on values' range */ + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + return alloc; +} + +/* + * Moves the cwksp to the next phase, and does any necessary allocations. + * cwksp initialization must necessarily go through each phase in order. + * Returns a 0 on success, or zstd error + */ +MEM_STATIC size_t +ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) +{ assert(phase >= ws->phase); if (phase > ws->phase) { + /* Going from allocating objects to allocating buffers */ if (ws->phase < ZSTD_cwksp_alloc_buffers && phase >= ZSTD_cwksp_alloc_buffers) { ws->tableValidEnd = ws->objectEnd; } + + /* Going from allocating buffers to allocating aligneds/tables */ if (ws->phase < ZSTD_cwksp_alloc_aligned && phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; + { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ + size_t const bytesToAlign = + ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); + DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); + ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ + RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), + memory_allocation, "aligned phase - alignment initial allocation failed!"); } - } + { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ + void* const alloc = ws->objectEnd; + size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); + void* const objectEnd = (BYTE*)alloc + bytesToAlign; + DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); + RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, + "table phase - alignment initial allocation failed!"); + ws->objectEnd = objectEnd; + ws->tableEnd = objectEnd; /* table area starts being empty */ + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } } } ws->phase = phase; + ZSTD_cwksp_assert_internal_consistency(ws); } + return 0; } /* * Returns whether this object/buffer/etc was allocated in this workspace. */ -MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) +{ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); } /* * Internal function. Do not use directly. */ -MEM_STATIC void* ZSTD_cwksp_reserve_internal( - ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { +MEM_STATIC void* +ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) +{ void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - - if (bytes == 0) + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { return NULL; + } - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); - if (alloc < bottom) { - DEBUGLOG(4, "cwksp: alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - if (alloc < ws->tableValidEnd) { - ws->tableValidEnd = alloc; - } - ws->allocStart = alloc; + alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); return alloc; @@ -259,33 +338,44 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( /* * Reserves and returns unaligned memory. */ -MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) +{ return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); } /* - * Reserves and returns memory sized on and aligned on sizeof(unsigned). + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). */ -MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) +{ + void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), + ZSTD_cwksp_alloc_aligned); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return ptr; } /* - * Aligned on sizeof(unsigned). These buffers have the special property that + * Aligned on 64 bytes. These buffers have the special property that * their values remain constrained, allowing us to re-use them without * memset()-ing them. */ -MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) +{ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; + void* alloc; + void* end; + void* top; + + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { + return NULL; + } + alloc = ws->tableEnd; + end = (BYTE *)alloc + bytes; + top = ws->allocStart; DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); ZSTD_cwksp_assert_internal_consistency(ws); assert(end <= top); if (end > top) { @@ -296,27 +386,31 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { ws->tableEnd = end; + assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); return alloc; } /* * Aligned on sizeof(void*). + * Note : should happen only once, at workspace first initialization */ -MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { - size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) +{ + size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); void* alloc = ws->objectEnd; void* end = (BYTE*)alloc + roundedBytes; - DEBUGLOG(5, + DEBUGLOG(4, "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); - assert(((size_t)alloc & (sizeof(void*)-1)) == 0); - assert((bytes & (sizeof(void*)-1)) == 0); + assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0); + assert(bytes % ZSTD_ALIGNOF(void*) == 0); ZSTD_cwksp_assert_internal_consistency(ws); /* we must be in the first phase, no advance is possible */ if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { - DEBUGLOG(4, "cwksp: object alloc failed!"); + DEBUGLOG(3, "cwksp: object alloc failed!"); ws->allocFailed = 1; return NULL; } @@ -328,7 +422,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { return alloc; } -MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) +{ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); @@ -451,6 +546,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { * Functions Checking Free Space ***************************************/ +/* ZSTD_alignmentSpaceWithinBounds() : + * Returns if the estimated space needed for a wksp is within an acceptable limit of the + * actual amount of space used. + */ +MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, + size_t const estimatedSpace, int resizedWorkspace) { + if (resizedWorkspace) { + /* Resized/newly allocated wksp should have exact bounds */ + return ZSTD_cwksp_used(ws) == estimatedSpace; + } else { + /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes + * than estimatedSpace. See the comments in zstd_cwksp.h for details. + */ + return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); + } +} + + MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); } diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c index b0424d23ac57f..76933dea2624e 100644 --- a/lib/zstd/compress/zstd_double_fast.c +++ b/lib/zstd/compress/zstd_double_fast.c @@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( +size_t ZSTD_compressBlock_doubleFast_noDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + size_t mLength; + U32 offset; + U32 curr; + + /* how many positions to search before increasing step size */ + const size_t kStepIncr = 1 << kSearchStrength; + /* the position at which to increment the step size if no match is found */ + const BYTE* nextStep; + size_t step; /* the current step size */ + + size_t hl0; /* the long hash at ip */ + size_t hl1; /* the long hash at ip1 */ + + U32 idxl0; /* the long match index for ip */ + U32 idxl1; /* the long match index for ip1 */ + + const BYTE* matchl0; /* the long match for ip */ + const BYTE* matchs0; /* the short match for ip */ + const BYTE* matchl1; /* the long match for ip1 */ + + const BYTE* ip = istart; /* the current position */ + const BYTE* ip1; /* the next position */ + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); + + /* init */ + ip += ((ip - prefixLowest) == 0); + { + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Outer Loop: one iteration per match found and stored */ + while (1) { + step = 1; + nextStep = ip + kStepIncr; + ip1 = ip + step; + + if (ip1 > ilimit) { + goto _cleanup; + } + + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + idxl0 = hashLong[hl0]; + matchl0 = base + idxl0; + + /* Inner Loop: one iteration per search / position */ + do { + const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 idxs0 = hashSmall[hs0]; + curr = (U32)(ip-base); + matchs0 = base + idxs0; + + hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ + + /* check noDict repcode */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); + goto _match_stored; + } + + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + + if (idxl0 > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchl0) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; + offset = (U32)(ip-matchl0); + while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ + goto _match_found; + } + } + + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + + if (idxs0 > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(matchs0) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + step++; + nextStep += kStepIncr; + } + ip = ip1; + ip1 += step; + + hl0 = hl1; + idxl0 = idxl1; + matchl0 = matchl1; + #if defined(__aarch64__) + PREFETCH_L1(ip+256); + #endif + } while (ip1 <= ilimit); + +_cleanup: + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_search_next_long: + + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; + } + } + + /* if no long +1 match, explore the short match we found */ + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + + /* fall-through */ + +_match_found: /* requires ip, offset, mLength */ + offset_2 = offset_1; + offset_1 = offset; + + if (step < 4) { + /* It is unsafe to write this value back to the hashtable when ip1 is + * greater than or equal to the new ip we will have after we're done + * processing this match. Rather than perform that test directly + * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler + * more predictable test. The minmatch even if we take a short match is + * 4 bytes, so as long as step, the distance between ip and ip1 + * (initially) is less than 4, we know ip1 < new ip. */ + hashLong[hl1] = (U32)(ip1 - base); + } + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } + } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) + U32 const mls /* template */) { ZSTD_compressionParameters const* cParams = &ms->cParams; U32* const hashLong = ms->hashTable; @@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic( U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams; + const U32* const dictHashLong = dms->hashTable; + const U32* const dictHashSmall = dms->chainTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); + const U32 dictHBitsL = dictCParams->hashLog; + const U32 dictHBitsS = dictCParams->chainLog; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); /* init */ ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const curr = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } + + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -135,29 +317,18 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; const U32 repIndex = curr + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? + const BYTE* repMatch = (repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + /* check repcode */ + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); goto _match_stored; } @@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState long match */ U32 const dictMatchIndexL = dictHashLong[dictHL]; const BYTE* dictMatchL = dictBase + dictMatchIndexL; @@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (MEM_read32(match) == MEM_read32(ip)) { goto _search_next_long; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dictMatchState short match */ U32 const dictMatchIndexS = dictHashSmall[dictHS]; match = dictBase + dictMatchIndexS; @@ -220,7 +391,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ goto _match_found; } - } else if (dictMode == ZSTD_dictMatchState) { + } else { /* check dict long +1 match */ U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; @@ -234,7 +405,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( } } } /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + if (matchIndexS < prefixLowestIndex) { mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; offset = (U32)(curr - matchIndexS); while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ @@ -248,7 +419,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); _match_stored: /* match found */ @@ -266,43 +437,27 @@ size_t ZSTD_compressBlock_doubleFast_generic( } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; anchor = ip; - continue; /* faster when present ... (?) */ - } } } + continue; + } + break; + } + } } /* while (ip < ilimit) */ /* save reps for next block */ @@ -313,6 +468,24 @@ size_t ZSTD_compressBlock_doubleFast_generic( return (size_t)(iend - anchor); } +#define ZSTD_GEN_DFAST_FN(dictMode, mls) \ + static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_DFAST_FN(noDict, 4) +ZSTD_GEN_DFAST_FN(noDict, 5) +ZSTD_GEN_DFAST_FN(noDict, 6) +ZSTD_GEN_DFAST_FN(noDict, 7) + +ZSTD_GEN_DFAST_FN(dictMatchState, 4) +ZSTD_GEN_DFAST_FN(dictMatchState, 5) +ZSTD_GEN_DFAST_FN(dictMatchState, 6) +ZSTD_GEN_DFAST_FN(dictMatchState, 7) + size_t ZSTD_compressBlock_doubleFast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -323,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); } } @@ -343,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); } } @@ -385,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -407,12 +580,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) + & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -423,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -448,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -475,12 +648,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) + & (offset_2 <= current2 - dictStartIndex)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; @@ -498,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( return (size_t)(iend - anchor); } +ZSTD_GEN_DFAST_FN(extDict, 4) +ZSTD_GEN_DFAST_FN(extDict, 5) +ZSTD_GEN_DFAST_FN(extDict, 6) +ZSTD_GEN_DFAST_FN(extDict, 7) size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -508,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); } } diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c index 96b7d48e2868e..a752e6beab52e 100644 --- a/lib/zstd/compress/zstd_fast.c +++ b/lib/zstd/compress/zstd_fast.c @@ -43,145 +43,294 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } +/* + * If you squint hard enough (and ignore repcodes), the search operation at any + * given position is broken into 4 stages: + * + * 1. Hash (map position to hash value via input read) + * 2. Lookup (map hash val to index via hashtable read) + * 3. Load (map index to value at that position via input read) + * 4. Compare + * + * Each of these steps involves a memory read at an address which is computed + * from the previous step. This means these steps must be sequenced and their + * latencies are cumulative. + * + * Rather than do 1->2->3->4 sequentially for a single position before moving + * onto the next, this implementation interleaves these operations across the + * next few positions: + * + * R = Repcode Read & Compare + * H = Hash + * T = Table Lookup + * M = Match Read & Compare + * + * Pos | Time --> + * ----+------------------- + * N | ... M + * N+1 | ... TM + * N+2 | R H T M + * N+3 | H TM + * N+4 | R H T M + * N+5 | H ... + * N+6 | R ... + * + * This is very much analogous to the pipelining of execution in a CPU. And just + * like a CPU, we have to dump the pipeline when we find a match (i.e., take a + * branch). + * + * When this happens, we throw away our current state, and do the following prep + * to re-enter the loop: + * + * Pos | Time --> + * ----+------------------- + * N | H T + * N+1 | H + * + * This is also the work we do at the beginning to enter the loop initially. + */ FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( +ZSTD_compressBlock_fast_noDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const mls) + U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; + + const BYTE* anchor = istart; + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + U32 rep_offset1 = rep[0]; + U32 rep_offset2 = rep[1]; U32 offsetSaved = 0; - /* init */ + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + U32 idx; /* match idx for ip0 */ + U32 mval; /* src value at match idx */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + + /* ip0 and ip1 are always adjacent. The targetLength skipping and + * uncompressibility acceleration is applied to every other position, + * matching the behavior of #1562. step therefore represents the gap + * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */ + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; { U32 const curr = (U32)(ip0 - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; + if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; } - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; - -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif - - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ - - assert(ip0 + 1 == ip1); - - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; + /* start each op */ +_start: /* Requires: ip0 */ + + step = stepSize; + nextStep = ip0 + kStepIncr; + + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; + + if (ip3 >= ilimit) { + goto _cleanup; + } + + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + + idx = hashTable[hash0]; + + do { + /* load repcode match for ip[2]*/ + const U32 rval = MEM_read32(ip2 - rep_offset1); + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { + ip0 = ip2; + match0 = ip0 - rep_offset1; + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = STORE_REPCODE_1; mLength += 4; - offcode = 0; goto _match; } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ - goto _offset; + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ goto _offset; } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; + + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip0 + step; + ip3 = ip1 + step; + + /* calculate step */ + if (ip2 >= nextStep) { + step++; + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + nextStep += kStepIncr; + } + } while (ip3 < ilimit); + +_cleanup: + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; /* Return the last literals size */ return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx */ + + /* Compute the offset code. */ + match0 = base + idx; + rep_offset2 = rep_offset1; + rep_offset1 = (U32)(ip0-match0); + offcode = STORE_OFFSET(rep_offset1); + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } + +_match: /* Requires: ip0, match0, offcode */ + + /* Count the forward length. */ + mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); + + ip0 += mLength; + anchor = ip0; + + /* write next hash table entry */ + if (ip1 < ip0) { + hashTable[hash1] = (U32)(ip1 - base); + } + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; + { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + + goto _start; } +#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ + static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ + } + +ZSTD_GEN_FAST_FN(noDict, 4, 1) +ZSTD_GEN_FAST_FN(noDict, 5, 1) +ZSTD_GEN_FAST_FN(noDict, 6, 1) +ZSTD_GEN_FAST_FN(noDict, 7, 1) + +ZSTD_GEN_FAST_FN(noDict, 4, 0) +ZSTD_GEN_FAST_FN(noDict, 5, 0) +ZSTD_GEN_FAST_FN(noDict, 6, 0) +ZSTD_GEN_FAST_FN(noDict, 7, 0) size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -189,24 +338,40 @@ size_t ZSTD_compressBlock_fast( { U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + if (ms->cParams.targetLength > 1) { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); + } + } else { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); + } + } } FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_dictMatchState_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -242,6 +407,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(endIndex - prefixStartIndex <= maxDistance); (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + (void)hasStep; /* not currently specialized on whether it's accelerated */ + /* ensure there will be no underflow * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); @@ -272,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -292,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -307,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); } /* match found */ @@ -332,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -351,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( return (size_t)(iend - anchor); } + +ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) + size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -361,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); } } static size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -398,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + (void)hasStep; /* not currently specialized on whether it's accelerated */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -416,14 +591,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repMatch = repBase + repIndex; hashTable[h] = curr; /* update hash table */ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); - assert(offset_1 <= curr +1); /* check repIndex */ - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ + & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); ip += rLength; anchor = ip; } else { @@ -439,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ip += mLength; anchor = ip; } } @@ -453,12 +628,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -475,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( return (size_t)(iend - anchor); } +ZSTD_GEN_FAST_FN(extDict, 4, 0) +ZSTD_GEN_FAST_FN(extDict, 5, 0) +ZSTD_GEN_FAST_FN(extDict, 6, 0) +ZSTD_GEN_FAST_FN(extDict, 7, 0) size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -485,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); } } diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c index fb54d4e28a2bc..0298a01a7504a 100644 --- a/lib/zstd/compress/zstd_lazy.c +++ b/lib/zstd/compress/zstd_lazy.c @@ -61,7 +61,7 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, * assumption : curr >= btlow == (curr - btmask) * doesn't fail */ static void -ZSTD_insertDUBT1(ZSTD_matchState_t* ms, +ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, U32 curr, const BYTE* inputEnd, U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) @@ -151,7 +151,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, static size_t ZSTD_DUBT_findBetterDictMatch ( - ZSTD_matchState_t* ms, + const ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, size_t* offsetPtr, size_t bestLength, @@ -197,8 +197,8 @@ ZSTD_DUBT_findBetterDictMatch ( U32 matchIndex = dictMatchIndex + dictIndexDelta; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); } if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ @@ -218,7 +218,7 @@ ZSTD_DUBT_findBetterDictMatch ( } if (bestLength >= MINMATCH) { - U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } @@ -328,7 +328,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) { nbCompares = 0; /* in addition to avoiding checking any @@ -368,7 +368,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ if (bestLength >= MINMATCH) { - U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } @@ -391,91 +391,9 @@ ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); } - -static size_t -ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - - /* ********************************* -* Hash Chain +* Dedicated dict search ***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) { @@ -485,7 +403,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32* const chainTable = ms->chainTable; U32 const chainSize = 1 << ms->cParams.chainLog; U32 idx = ms->nextToUpdate; - U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const minChain = chainSize < target - idx ? target - chainSize : idx; U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; U32 const cacheSize = bucketSize - 1; U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; @@ -499,13 +417,12 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; U32* const tmpHashTable = hashTable; U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); - U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; - U32 hashIdx; assert(ms->cParams.chainLog <= 24); - assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(ms->cParams.hashLog > ms->cParams.chainLog); assert(idx != 0); assert(tmpMinChain <= minChain); @@ -536,7 +453,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B if (count == cacheSize) { for (count = 0; count < chainLimit;) { if (i < minChain) { - if (!i || countBeyondMinChain++ > cacheSize) { + if (!i || ++countBeyondMinChain > cacheSize) { /* only allow pulling `cacheSize` number of entries * into the cache or chainTable beyond `minChain`, * to replace the entries pulled out of the @@ -592,10 +509,143 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B ms->nextToUpdate = target; } +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( +size_t ZSTD_HcFindBestMatch( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -653,7 +703,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + *offsetPtr = STORE_OFFSET(curr - matchIndex); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } @@ -663,90 +713,8 @@ size_t ZSTD_HcFindBestMatch_generic ( assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; - U32 ddsAttempt; - - for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 const chainIndex = chainPackedPointer >> 8; - - PREFETCH_L1(&dms->chainTable[chainIndex]); - } - - for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; - match = ddsBase + matchIndex; - - if (!matchIndex) { - return ml; - } - - /* guaranteed by table construction */ - (void)ddsLowestIndex; - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; - } - } - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 chainIndex = chainPackedPointer >> 8; - U32 const chainLength = chainPackedPointer & 0xFF; - U32 const chainAttempts = nbAttempts - ddsAttempt; - U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; - U32 chainAttempt; - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { - PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); - } - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->chainTable[chainIndex]; - match = ddsBase + matchIndex; - - /* guaranteed by table construction */ - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } - } + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); @@ -770,7 +738,8 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } @@ -783,75 +752,725 @@ size_t ZSTD_HcFindBestMatch_generic ( return ml; } +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) +#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ + +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { + assert(val != 0); +# if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) + if (sizeof(size_t) == 4) { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (leastSignificantWord == 0) { + return 32 + (U32)__builtin_ctz(mostSignificantWord); + } else { + return (U32)__builtin_ctz(leastSignificantWord); + } + } else { + return (U32)__builtin_ctzll(val); + } +# else + /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count + * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer + */ + val = ~val & (val - 1ULL); /* Lowest set bit mask */ + val = val - ((val >> 1) & 0x5555555555555555); + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); + return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); +# endif +} + +/* ZSTD_rotateRight_*(): + * Rotates a bitfield to the right by "count" bits. + * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts + */ +FORCE_INLINE_TEMPLATE +U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { + assert(count < 64); + count &= 0x3F; /* for fickle pattern recognition */ + return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { + assert(count < 32); + count &= 0x1F; /* for fickle pattern recognition */ + return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); +} + +FORCE_INLINE_TEMPLATE +U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { + assert(count < 16); + count &= 0x0F; /* for fickle pattern recognition */ + return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 const next = (*tagRow - 1) & rowMask; + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog >= 5) { + PREFETCH_L1(hashTable + relRow + 16); + /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */ + } + PREFETCH_L1(tagTable + relRow); + if (rowLog == 6) { + PREFETCH_L1(tagTable + relRow + 32); + } + assert(rowLog == 4 || rowLog == 5 || rowLog == 6); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, + * but not beyond iLimit. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iLimit) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + U32 const* const hashTable = ms->hashTable; + U16 const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); } +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + U16 const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls) +{ + U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; + } +} -static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +/* ZSTD_row_update_internalImpl(): + * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, + U32 updateStartIdx, U32 const updateEndIdx, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + + DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); + for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls) + : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. + Explicit cast allows us to get exact desired position within each row */ + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); + ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = updateStartIdx; } } +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. + * Skips sections of long matches as is necessary. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32 idx = ms->nextToUpdate; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + const U32 kSkipThreshold = 384; + const U32 kMaxMatchStartPositionsToUpdate = 96; + const U32 kMaxMatchEndPositionsToUpdate = 32; + + if (useCache) { + /* Only skip positions when using hash cache, i.e. + * if we are loading a dict, don't skip anything. + * If we decide to skip, then we only update a set number + * of positions at the beginning and end of the match. + */ + if (UNLIKELY(target - idx > kSkipThreshold)) { + U32 const bound = idx + kMaxMatchStartPositionsToUpdate; + ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache); + idx = target - kMaxMatchEndPositionsToUpdate; + ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1); + } + } + assert(target >= idx); + ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache); + ms->nextToUpdate = target; +} -static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); +} + +#if defined(ZSTD_ARCH_X86_SSE2) +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + const __m128i comparisonMask = _mm_set1_epi8((char)tag); + int matches[4] = {0}; + int i; + assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4); + for (i=0; i> chunkSize; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= (chunk * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } else { /* big endian: reverse bits during extraction */ + const size_t msb = xFF ^ (xFF >> 1); + const size_t extractMagic = (msb / 0x1FF) | msb; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= ((chunk >> 7) * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } + matches = ~matches; + if (rowEntries == 16) { + return ZSTD_rotateRight_U16((U16)matches, head); + } else if (rowEntries == 32) { + return ZSTD_rotateRight_U32((U32)matches, head); + } else { + return ZSTD_rotateRight_U64((U64)matches, head); + } + } +#endif +} -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" + * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can + * be considered as a circular buffer with a "head" index that resides in the tagTable. + * - Also insert the "tag" into the equivalent row and position in the tagTable. + * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. + * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, + * for alignment/performance reasons, leaving some bytes unused. + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_RowFindBestMatch( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) { - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + + /* Initialize the following variables to satisfy static analyzer */ + size_t ddsIdx = 0; + U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag = 0; + U32* dmsRow = NULL; + BYTE* dmsTagRow = NULL; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + U16* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + { /* Get the hash for ip, compute the appropriate row */ + U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const head = *tagRow & rowMask; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = row[matchPos]; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = STORE_OFFSET(curr - matchIndex); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const head = *dmsTagRow & rowMask; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); + if (ip+currentMl == iLimit) break; + } + } + } } + return ml; } +/* + * Generate search functions templated on (dictMode, mls, rowLog). + * These functions are outlined for code size & compilation time. + * ZSTD_searchMax() dispatches to the correct implementation function. + * + * TODO: The start of the search function involves loading and calculating a + * bunch of constants from the ZSTD_matchState_t. These computations could be + * done in an initialization function, and saved somewhere in the match state. + * Then we could pass a pointer to the saved state instead of the match state, + * and avoid duplicate computations. + * + * TODO: Move the match re-winding into searchMax. This improves compression + * ratio, and unlocks further simplifications with the next TODO. + * + * TODO: Try moving the repcode search into searchMax. After the re-winding + * and repcode search are in searchMax, there is no more logic in the match + * finder loop that requires knowledge about the dictMode. So we should be + * able to avoid force inlining it, and we can join the extDict loop with + * the single segment loop. It should go in searchMax instead of its own + * function to avoid having multiple virtual function calls per search. + */ + +#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls +#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls +#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog + +#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE + +#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offBasePtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \ + return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \ + } \ + +#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \ + X(dictMode, mls, 4) \ + X(dictMode, mls, 5) \ + X(dictMode, mls, 6) + +#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6) + +#define ZSTD_FOR_EACH_MLS(X, dictMode) \ + X(dictMode, 4) \ + X(dictMode, 5) \ + X(dictMode, 6) + +#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \ + X(__VA_ARGS__, noDict) \ + X(__VA_ARGS__, extDict) \ + X(__VA_ARGS__, dictMatchState) \ + X(__VA_ARGS__, dedicatedDictSearch) + +/* Generate row search fns for each combination of (dictMode, mls, rowLog) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN) +/* Generate binary Tree search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN) +/* Generate hash chain search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN) + +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; + +#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + case rowLog: \ + return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr); + +#define ZSTD_SWITCH_MLS(X, dictMode) \ + switch (mls) { \ + ZSTD_FOR_EACH_MLS(X, dictMode) \ + } + +#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \ + case mls: \ + switch (rowLog) { \ + ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \ + } \ + ZSTD_UNREACHABLE; \ + break; + +#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \ + switch (searchMethod) { \ + case search_hashChain: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \ + break; \ + case search_binaryTree: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \ + break; \ + case search_rowHash: \ + ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \ + break; \ + } \ + ZSTD_UNREACHABLE; + +/* + * Searches for the longest match at @p ip. + * Dispatches to the correct implementation function based on the + * (searchMethod, dictMode, mls, rowLog). We use switch statements + * here instead of using an indirect function call through a function + * pointer because after Spectre and Meltdown mitigations, indirect + * function calls can be very costly, especially in the kernel. + * + * NOTE: dictMode and searchMethod should be templated, so those switch + * statements should be optimized out. Only the mls & rowLog switches + * should be left. + * + * @param ms The match state. + * @param ip The position to search at. + * @param iend The end of the input data. + * @param[out] offsetPtr Stores the match offset into this pointer. + * @param mls The minimum search length, in the range [4, 6]. + * @param rowLog The row log (if applicable), in the range [4, 6]. + * @param searchMethod The search method to use (templated). + * @param dictMode The dictMode (templated). + * + * @returns The length of the longest match found, or < mls if no match is found. + * If a match is found its offset is stored in @p offsetPtr. + */ +FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( + ZSTD_matchState_t* ms, + const BYTE* ip, + const BYTE* iend, + size_t* offsetPtr, + U32 const mls, + U32 const rowLog, + searchMethod_e const searchMethod, + ZSTD_dictMode_e const dictMode) +{ + if (dictMode == ZSTD_noDict) { + ZSTD_SWITCH_SEARCH_METHOD(noDict) + } else if (dictMode == ZSTD_extDict) { + ZSTD_SWITCH_SEARCH_METHOD(extDict) + } else if (dictMode == ZSTD_dictMatchState) { + ZSTD_SWITCH_SEARCH_METHOD(dictMatchState) + } else if (dictMode == ZSTD_dedicatedDictSearch) { + ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch) + } + ZSTD_UNREACHABLE; + return 0; +} + /* ******************************* * Common parser - lazy strategy *********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( @@ -865,41 +1484,13 @@ ZSTD_compressBlock_lazy_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - - /* - * This table is indexed first by the four ZSTD_dictMode_e values, and then - * by the two searchMethod_e values. NULLs are placed for configurations - * that should never occur (extDict modes go to the other implementation - * below and there is no DDSS for binary tree search yet). - */ - const searchMax_f searchFuncs[4][2] = { - { - ZSTD_HcFindBestMatch_selectMLS, - ZSTD_BtFindBestMatch_selectMLS - }, - { - NULL, - NULL - }, - { - ZSTD_HcFindBestMatch_dictMatchState_selectMLS, - ZSTD_BtFindBestMatch_dictMatchState_selectMLS - }, - { - ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, - NULL - } - }; - - searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const int isDMS = dictMode == ZSTD_dictMatchState; @@ -915,11 +1506,7 @@ ZSTD_compressBlock_lazy_generic( 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - assert(searchMax != NULL); - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { U32 const curr = (U32)(ip - base); @@ -935,6 +1522,12 @@ ZSTD_compressBlock_lazy_generic( assert(offset_2 <= dictAndPrefixLength); } + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + /* Match Loop */ #if defined(__x86_64__) /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the @@ -944,8 +1537,9 @@ ZSTD_compressBlock_lazy_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offset=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; + DEBUGLOG(7, "search baseline (depth 0)"); /* check repCode */ if (isDxS) { @@ -969,9 +1563,9 @@ ZSTD_compressBlock_lazy_generic( /* first search (depth 0) */ { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offcode=offsetFound; } if (matchLength < 4) { @@ -982,14 +1576,15 @@ ZSTD_compressBlock_lazy_generic( /* let's try to find a better solution */ if (depth>=1) while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1001,30 +1596,31 @@ ZSTD_compressBlock_lazy_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1036,46 +1632,45 @@ ZSTD_compressBlock_lazy_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. + * Pay attention that `start[-value]` can lead to strange undefined behavior + * notably if `value` is unsigned, resulting in a large positive `-value`. */ /* catch up */ - if (offset) { + if (STORED_IS_OFFSET(offcode)) { if (dictMode == ZSTD_noDict) { - while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) - && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) + && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } if (isDxS) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); } /* store sequence */ _storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); anchor = ip = start + matchLength; } @@ -1091,8 +1686,8 @@ ZSTD_compressBlock_lazy_generic( && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; @@ -1106,8 +1701,8 @@ ZSTD_compressBlock_lazy_generic( && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1200,6 +1795,70 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } +/* Row-based matchfinder */ +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( @@ -1212,7 +1871,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; @@ -1220,18 +1879,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; const U32 windowLog = ms->cParams.windowLog; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); U32 offset_1 = rep[0], offset_2 = rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); /* init */ ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } /* Match Loop */ #if defined(__x86_64__) @@ -1242,7 +1903,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offset=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; U32 curr = (U32)(ip-base); @@ -1251,7 +1912,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1261,9 +1923,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* first search (depth 0) */ { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offcode=offsetFound; } if (matchLength < 4) { @@ -1277,29 +1939,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ip ++; curr++; /* check repCode */ - if (offset) { + if (offcode) { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 1 */ { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; /* search a better one */ } } @@ -1308,47 +1971,48 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ip ++; curr++; /* check repCode */ - if (offset) { + if (offcode) { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 2 */ { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offcode = offset2, start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + if (STORED_IS_OFFSET(offcode)) { + U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); } /* store sequence */ _storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); anchor = ip = start + matchLength; } @@ -1359,13 +2023,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1412,3 +2077,26 @@ size_t ZSTD_compressBlock_btlazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h index 2fc5a61821344..e5bdf4df8dde0 100644 --- a/lib/zstd/compress/zstd_lazy.h +++ b/lib/zstd/compress/zstd_lazy.h @@ -23,6 +23,7 @@ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); @@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy( size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict( size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + #endif /* ZSTD_LAZY_H */ diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c index 8ef7e88a5adde..dd86fc83e7dde 100644 --- a/lib/zstd/compress/zstd_ldm.c +++ b/lib/zstd/compress/zstd_ldm.c @@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* } } +/* ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + /* ZSTD_ldm_gear_feed(): * * Registers in the splits array all the split points found in the first @@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; + return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; } size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) { - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; + return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; } /* ZSTD_ldm_getBucket() : @@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable( while (ip < iend) { size_t hashed; unsigned n; - + numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); @@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal( /* Initialize the rolling hash state with the first minMatchLength bytes */ ZSTD_ldm_gear_init(&hashState, params); - { - size_t n = 0; - - while (n < minMatchLength) { - numSplits = 0; - n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, - splits, &numSplits); - } - ip += minMatchLength; - } + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; while (ip < ilimit) { size_t hashed; @@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( for (n = 0; n < numSplits; n++) { size_t forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength; + U32 offset; BYTE const* const split = candidates[n].split; U32 const checksum = candidates[n].checksum; U32 const hash = candidates[n].hash; @@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal( } /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; mLength = forwardMatchLength + backwardMatchLength; { - U32 const offset = (U32)(split - base) - bestEntry->offset; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ @@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal( ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); anchor = split + forwardMatchLength; + + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outer loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } } ip += hashed; @@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( assert(chunkStart < iend); /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); @@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences( return 0; } -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { +void +ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) +{ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; if (srcSize <= seq->litLength) { @@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, void const* src, size_t srcSize) { const ZSTD_compressionParameters* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; @@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, rep[0] = sequence.offset; /* Store the sequence */ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); + STORE_OFFSET(sequence.offset), + sequence.matchLength); ip += sequence.matchLength; } } diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h index 25b25270b72e2..fbc6a5e88fd7a 100644 --- a/lib/zstd/compress/zstd_ldm.h +++ b/lib/zstd/compress/zstd_ldm.h @@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences( */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, void const* src, size_t srcSize); /* diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h index e5c24d856b0a8..647f865be2903 100644 --- a/lib/zstd/compress/zstd_ldm_geartab.h +++ b/lib/zstd/compress/zstd_ldm_geartab.h @@ -11,7 +11,10 @@ #ifndef ZSTD_LDM_GEARTAB_H #define ZSTD_LDM_GEARTAB_H -static U64 ZSTD_ldm_gearTab[256] = { +#include "../common/compiler.h" /* UNUSED_ATTR */ +#include "../common/mem.h" /* U64 */ + +static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = { 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c index dfc55e3e8119b..fd82acfda62f6 100644 --- a/lib/zstd/compress/zstd_opt.c +++ b/lib/zstd/compress/zstd_opt.c @@ -8,25 +8,12 @@ * You may select, at your option, one of the above-listed licenses. */ -/* - * Disable inlining for the optimal parser for the kernel build. - * It is unlikely to be used in the kernel, and where it is used - * latency shouldn't matter because it is very slow to begin with. - * We prefer a ~180KB binary size win over faster optimal parsing. - * - * TODO(https://github.com/facebook/zstd/issues/2862): - * Improve the code size of the optimal parser in general, so we - * don't need this hack for the kernel build. - */ -#define ZSTD_NO_INLINE 1 - #include "zstd_compress_internal.h" #include "hist.h" #include "zstd_opt.h" #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ -#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ @@ -36,11 +23,11 @@ * Price functions for optimal parser ***************************************/ -#if 0 /* approximation at bit level */ +#if 0 /* approximation at bit level (for tests) */ # define BITCOST_ACCURACY 0 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) -#elif 0 /* fractional bit accuracy */ +# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy (for tests) */ # define BITCOST_ACCURACY 8 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) @@ -78,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price) static int ZSTD_compressedLiterals(optState_t const* const optPtr) { - return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; + return optPtr->literalCompressionMode != ZSTD_ps_disable; } static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) @@ -91,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) } -/* ZSTD_downscaleStat() : - * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) - * return the resulting sum of elements */ -static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +static U32 sum_u32(const unsigned table[], size_t nbElts) +{ + size_t n; + U32 total = 0; + for (n=0; n 0 && ZSTD_FREQ_DIV+malus < 31); + DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); + assert(shift < 30); for (s=0; s> (ZSTD_FREQ_DIV+malus)); + table[s] = 1 + (table[s] >> shift); sum += table[s]; } return sum; } +/* ZSTD_scaleStats() : + * reduce all elements in table is sum too large + * return the resulting sum of elements */ +static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) +{ + U32 const prevsum = sum_u32(table, lastEltIndex+1); + U32 const factor = prevsum >> logTarget; + DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); + assert(logTarget < 30); + if (factor <= 1) return prevsum; + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); +} + /* ZSTD_rescaleFreqs() : * if first block (detected by optPtr->litLengthSum == 0) : init statistics * take hints from dictionary if there is one - * or init from zero, using src for literals stats, or flat 1 for match symbols + * and init from zero if there is none, + * using src for literals stats, and baseline stats for sequence symbols * otherwise downscale existing stats, to be used as seed for next block. */ static void @@ -138,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->litSum = 0; for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); assert(bitCost <= scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litSum += optPtr->litFreq[lit]; @@ -186,14 +194,19 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, if (compressedLiterals) { unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); } - { unsigned ll; - for (ll=0; ll<=MaxLL; ll++) - optPtr->litLengthFreq[ll] = 1; + { unsigned const baseLLfreqs[MaxLL+1] = { + 4, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); + optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); } - optPtr->litLengthSum = MaxLL+1; { unsigned ml; for (ml=0; ml<=MaxML; ml++) @@ -201,21 +214,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } optPtr->matchLengthSum = MaxML+1; - { unsigned of; - for (of=0; of<=MaxOff; of++) - optPtr->offCodeFreq[of] = 1; + { unsigned const baseOFCfreqs[MaxOff+1] = { + 6, 2, 1, 1, 2, 3, 4, 4, + 4, 3, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); + optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); } - optPtr->offCodeSum = MaxOff+1; + } } else { /* new block : re-use previous statistics, scaled down */ if (compressedLiterals) - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); + optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); + optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); + optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); } ZSTD_setBasePrices(optPtr, optLevel); @@ -251,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, * cost of literalLength symbol */ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + assert(litLength <= ZSTD_BLOCKSIZE_MAX); + if (optPtr->priceType == zop_predef) + return WEIGHT(litLength, optLevel); + /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX + * because it isn't representable in the zstd format. So instead just + * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block + * would be all literals. + */ + if (litLength == ZSTD_BLOCKSIZE_MAX) + return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -264,15 +291,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP /* ZSTD_getMatchPrice() : * Provides the cost of the match part (offset + matchLength) of a sequence * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. - * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ + * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 + * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) + */ FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, +ZSTD_getMatchPrice(U32 const offcode, U32 const matchLength, const optState_t* const optPtr, int const optLevel) { U32 price; - U32 const offCode = ZSTD_highbit32(offset+1); + U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); @@ -315,8 +344,8 @@ static void ZSTD_updateStats(optState_t* const optPtr, optPtr->litLengthSum++; } - /* match offset code (0-2=>repCode; 3+=>offset+2) */ - { U32 const offCode = ZSTD_highbit32(offsetCode+1); + /* offset code : expected to follow storeSeq() numeric representation */ + { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); assert(offCode <= MaxOff); optPtr->offCodeFreq[offCode]++; optPtr->offCodeSum++; @@ -350,7 +379,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, +static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, U32* nextToUpdate3, const BYTE* const ip) { @@ -376,11 +405,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, * Binary Tree search ***************************************/ /* ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . + * @param ip assumed <= iend-8 . + * @param target The target of ZSTD_updateTree_internal() - we are filling to this position * @return : nb of positions added */ static U32 ZSTD_insertBt1( - ZSTD_matchState_t* ms, + const ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iend, + U32 const target, U32 const mls, const int extDict) { const ZSTD_compressionParameters* const cParams = &ms->cParams; @@ -403,7 +434,10 @@ static U32 ZSTD_insertBt1( U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; + /* windowLow is based on target because + * we only need positions that will be in the window at the end of the tree update. + */ + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); U32 matchEndIdx = curr+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; @@ -416,6 +450,7 @@ static U32 ZSTD_insertBt1( DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + assert(curr <= target); assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = curr; /* Update Hash Table */ @@ -504,7 +539,7 @@ void ZSTD_updateTree_internal( idx, target, dictMode); while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); assert(idx < (U32)(idx + forward)); idx += forward; } @@ -609,7 +644,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", repCode, ll0, repOffset, repLen); bestLength = repLen; - matches[mnum].off = repCode - ll0; + matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ matches[mnum].len = (U32)repLen; mnum++; if ( (repLen > sufficient_len) @@ -638,7 +673,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( bestLength = mlen; assert(curr > matchIndex3); assert(mnum==0); /* no prior solution */ - matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].off = STORE_OFFSET(curr - matchIndex3); matches[0].len = (U32)mlen; mnum = 1; if ( (mlen > sufficient_len) | @@ -647,7 +682,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( return 1; } } } /* no dictMatchState lookup: dicts don't have a populated HC3 table */ - } + } /* if (mls == 3) */ hashTable[h] = curr; /* Update Hash Table */ @@ -672,20 +707,19 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); assert(matchEndIdx > matchIndex); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ break; /* drop, to preserve bt consistency (miss a little bit of compression) */ - } - } + } } if (match[matchLength] < ip[matchLength]) { /* match smaller than current */ @@ -721,18 +755,17 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { matchIndex = dictMatchIndex + dmsIndexDelta; DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } + } } if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ if (match[matchLength] < ip[matchLength]) { @@ -742,39 +775,91 @@ U32 ZSTD_insertBtAndGetAllMatches ( /* match is larger than current */ commonLengthLarger = matchLength; dictMatchIndex = nextPtr[0]; - } - } - } + } } } /* if (dictMode == ZSTD_dictMatchState) */ assert(matchEndIdx > curr+8); ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ return mnum; } - -FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( - ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, - U32 const lengthToBeat) +typedef U32 (*ZSTD_getAllMatchesFn)( + ZSTD_match_t*, + ZSTD_matchState_t*, + U32*, + const BYTE*, + const BYTE*, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat); + +FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( + ZSTD_match_t* matches, + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, + const BYTE* const iHighLimit, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat, + const ZSTD_dictMode_e dictMode, + const U32 mls) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->minMatch; - DEBUGLOG(8, "ZSTD_BtGetAllMatches"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); - switch(matchLengthSearch) - { - case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); - default : - case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); - case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); + DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); + if (ip < ms->window.base + ms->nextToUpdate) + return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); + return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); +} + +#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls + +#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ + static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ + ZSTD_match_t* matches, \ + ZSTD_matchState_t* ms, \ + U32* nextToUpdate3, \ + const BYTE* ip, \ + const BYTE* const iHighLimit, \ + const U32 rep[ZSTD_REP_NUM], \ + U32 const ll0, \ + U32 const lengthToBeat) \ + { \ + return ZSTD_btGetAllMatches_internal( \ + matches, ms, nextToUpdate3, ip, iHighLimit, \ + rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ + } + +#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) + +GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) + +#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ + { \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ } + +static ZSTD_getAllMatchesFn +ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) +{ + ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { + ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) + }; + U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); + assert((U32)dictMode < 3); + assert(mls - 3 < 4); + return getAllMatchesFns[(int)dictMode][mls - 3]; } /* *********************** @@ -783,16 +868,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( /* Struct containing info needed to make decision about ldm inclusion */ typedef struct { - rawSeqStore_t seqStore; /* External match candidates store for this block */ - U32 startPosInBlock; /* Start position of the current match candidate */ - U32 endPosInBlock; /* End position of the current match candidate */ - U32 offset; /* Offset of the match candidate */ + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ } ZSTD_optLdm_t; /* ZSTD_optLdm_skipRawSeqStoreBytes(): - * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + * Moves forward in @rawSeqStore by @nbBytes, + * which will update the fields 'pos' and 'posInSequence'. */ -static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) +{ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); while (currPos && rawSeqStore->pos < rawSeqStore->size) { rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; @@ -813,8 +900,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t * Calculates the beginning and end of the next match in the current block. * Updates 'pos' and 'posInSequence' of the ldmSeqStore. */ -static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, - U32 blockBytesRemaining) { +static void +ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) +{ rawSeq currSeq; U32 currBlockEndPos; U32 literalsBytesRemaining; @@ -826,8 +915,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu optLdm->endPosInBlock = UINT_MAX; return; } - /* Calculate appropriate bytes left in matchLength and litLength after adjusting - based on ldmSeqStore->posInSequence */ + /* Calculate appropriate bytes left in matchLength and litLength + * after adjusting based on ldmSeqStore->posInSequence */ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); currBlockEndPos = currPosInBlock + blockBytesRemaining; @@ -863,15 +952,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu } /* ZSTD_optLdm_maybeAddMatch(): - * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' - * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + * Adds a match if it's long enough, + * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', + * into 'matches'. Maintains the correct ordering of 'matches'. */ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, - ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { - U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) +{ + U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ - U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; - U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; /* Ensure that current block position is not outside of the match */ if (currPosInBlock < optLdm->startPosInBlock @@ -881,6 +971,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, } if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", candidateOffCode, candidateMatchLength, currPosInBlock); matches[*nbMatches].len = candidateMatchLength; @@ -892,8 +983,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, /* ZSTD_optLdm_processMatchCandidate(): * Wrapper function to update ldm seq store and call ldm functions as necessary. */ -static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, - U32 currPosInBlock, U32 remainingBytes) { +static void +ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, + ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) +{ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { return; } @@ -904,19 +998,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_ * at the end of a match from the ldm seq store, and will often be some bytes * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" */ - U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); - } + } ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); } ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); } + /*-******************************* * Optimal parser *********************************/ - static U32 ZSTD_totalLen(ZSTD_optimal_t sol) { return sol.litlen + sol.mlen; @@ -957,6 +1051,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, const BYTE* const prefixStart = base + ms->window.dictLimit; const ZSTD_compressionParameters* const cParams = &ms->cParams; + ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; U32 nextToUpdate3 = ms->nextToUpdate; @@ -984,7 +1080,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, (U32)(ip-istart), (U32)(iend - ip)); if (!nbMatches) { ip++; continue; } @@ -998,18 +1094,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, * in every price. We include the literal length to avoid negative * prices when we subtract the previous literal length. */ - opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); + opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); /* large match -> immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; - U32 const maxOffset = matches[nbMatches-1].off; + U32 const maxOffcode = matches[nbMatches-1].off; DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", - nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); if (maxML > sufficient_len) { lastSequence.litlen = litlen; lastSequence.mlen = maxML; - lastSequence.off = maxOffset; + lastSequence.off = maxOffcode; DEBUGLOG(6, "large match (%u>%u), immediate encoding", maxML, sufficient_len); cur = 0; @@ -1018,24 +1114,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + assert(opt[0].price >= 0); + { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); U32 pos; U32 matchNb; for (pos = 1; pos < minMatch; pos++) { opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ } for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; + U32 const offcode = matches[matchNb].off; U32 const end = matches[matchNb].len; for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); U32 const sequencePrice = literalsPrice + matchPrice; DEBUGLOG(7, "rPos:%u => set initial price : %.2f", pos, ZSTD_fCost(sequencePrice)); opt[pos].mlen = pos; - opt[pos].off = offset; + opt[pos].off = offcode; opt[pos].litlen = litlen; - opt[pos].price = sequencePrice; + opt[pos].price = (int)sequencePrice; } } last_pos = pos-1; } @@ -1050,9 +1147,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* Fix current position with one literal if cheaper */ { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; int const price = opt[cur-1].price - + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", @@ -1078,7 +1175,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, assert(cur >= opt[cur].mlen); if (opt[cur].mlen != 0) { U32 const prev = cur - opt[cur].mlen; - repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); } else { ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); @@ -1095,11 +1192,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ } + assert(opt[cur].price >= 0); { U32 const ll0 = (opt[cur].mlen != 0); U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = opt[cur].price; + U32 const previousPrice = (U32)opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); U32 matchNb; ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, @@ -1137,7 +1235,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ U32 const pos = cur + mlen; - int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); if ((pos > last_pos) || (price < opt[pos].price)) { DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", @@ -1167,7 +1265,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, * update them while traversing the sequences. */ if (lastSequence.mlen != 0) { - repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); ZSTD_memcpy(rep, &reps, sizeof(reps)); } else { ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); @@ -1211,7 +1309,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); anchor += advance; ip = anchor; } } @@ -1223,38 +1321,30 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, return (size_t)(iend - anchor); } +static size_t ZSTD_compressBlock_opt0( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); +} + +static size_t ZSTD_compressBlock_opt2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); +} size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } -/* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) -{ - U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus >= 0); - for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); -} /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. @@ -1276,7 +1366,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ /* invalidate first scan from history */ ZSTD_resetSeqStore(seqStore); @@ -1285,8 +1375,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->window.lowLimit = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit; - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); } size_t ZSTD_compressBlock_btultra( @@ -1294,7 +1382,7 @@ size_t ZSTD_compressBlock_btultra( const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } size_t ZSTD_compressBlock_btultra2( @@ -1322,35 +1410,35 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); } size_t ZSTD_compressBlock_btopt_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_btultra_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); } /* note : no btultra2 variant for extDict nor dictMatchState, diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c index 5105e59ac04a8..89b269a641c7e 100644 --- a/lib/zstd/decompress/huf_decompress.c +++ b/lib/zstd/decompress/huf_decompress.c @@ -22,6 +22,13 @@ #define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" #include "../common/error_private.h" +#include "../common/zstd_internal.h" + +/* ************************************************************** +* Constants +****************************************************************/ + +#define HUF_DECODER_FAST_TABLELOG 11 /* ************************************************************** * Macros @@ -36,6 +43,26 @@ #error "Cannot force the use of the X1 and X2 decoders at the same time!" #endif +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 +# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE +#else +# define HUF_ASM_X86_64_BMI2_ATTRS +#endif + +#define HUF_EXTERN_C +#define HUF_ASM_DECL HUF_EXTERN_C + +#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) +# define HUF_NEED_BMI2_FUNCTION 1 +#else +# define HUF_NEED_BMI2_FUNCTION 0 +#endif + +#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) +# define HUF_NEED_DEFAULT_FUNCTION 1 +#else +# define HUF_NEED_DEFAULT_FUNCTION 0 +#endif /* ************************************************************** * Error Management @@ -65,7 +92,7 @@ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ } \ \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ void* dst, size_t dstSize, \ const void* cSrc, size_t cSrcSize, \ const HUF_DTable* DTable) \ @@ -107,13 +134,147 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) return dtd; } +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +static size_t HUF_initDStream(BYTE const* ip) { + BYTE const lastByte = ip[7]; + size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + size_t const value = MEM_readLEST(ip) | 1; + assert(bitsConsumed <= 8); + return value << bitsConsumed; +} +typedef struct { + BYTE const* ip[4]; + BYTE* op[4]; + U64 bits[4]; + void const* dt; + BYTE const* ilimit; + BYTE* oend; + BYTE const* iend[4]; +} HUF_DecompressAsmArgs; + +/* + * Initializes args for the asm decoding loop. + * @returns 0 on success + * 1 if the fallback implementation should be used. + * Or an error code on failure. + */ +static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) +{ + void const* dt = DTable + 1; + U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; + + const BYTE* const ilimit = (const BYTE*)src + 6 + 8; + + BYTE* const oend = (BYTE*)dst + dstSize; + + /* The following condition is false on x32 platform, + * but HUF_asm is not compatible with this ABI */ + if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; + + /* strict minimum : jump table + 1 byte per stream */ + if (srcSize < 10) + return ERROR(corruption_detected); + + /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers. + * If table log is not correct at this point, fallback to the old decoder. + * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. + */ + if (dtLog != HUF_DECODER_FAST_TABLELOG) + return 1; + + /* Read the jump table. */ + { + const BYTE* const istart = (const BYTE*)src; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = srcSize - (length1 + length2 + length3 + 6); + args->iend[0] = istart + 6; /* jumpTable */ + args->iend[1] = args->iend[0] + length1; + args->iend[2] = args->iend[1] + length2; + args->iend[3] = args->iend[2] + length3; + + /* HUF_initDStream() requires this, and this small of an input + * won't benefit from the ASM loop anyways. + * length1 must be >= 16 so that ip[0] >= ilimit before the loop + * starts. + */ + if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) + return 1; + if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ + } + /* ip[] contains the position that is currently loaded into bits[]. */ + args->ip[0] = args->iend[1] - sizeof(U64); + args->ip[1] = args->iend[2] - sizeof(U64); + args->ip[2] = args->iend[3] - sizeof(U64); + args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64); + + /* op[] contains the output pointers. */ + args->op[0] = (BYTE*)dst; + args->op[1] = args->op[0] + (dstSize+3)/4; + args->op[2] = args->op[1] + (dstSize+3)/4; + args->op[3] = args->op[2] + (dstSize+3)/4; + + /* No point to call the ASM loop for tiny outputs. */ + if (args->op[3] >= oend) + return 1; + + /* bits[] is the bit container. + * It is read from the MSB down to the LSB. + * It is shifted left as it is read, and zeros are + * shifted in. After the lowest valid bit a 1 is + * set, so that CountTrailingZeros(bits[]) can be used + * to count how many bits we've consumed. + */ + args->bits[0] = HUF_initDStream(args->ip[0]); + args->bits[1] = HUF_initDStream(args->ip[1]); + args->bits[2] = HUF_initDStream(args->ip[2]); + args->bits[3] = HUF_initDStream(args->ip[3]); + + /* If ip[] >= ilimit, it is guaranteed to be safe to + * reload bits[]. It may be beyond its section, but is + * guaranteed to be valid (>= istart). + */ + args->ilimit = ilimit; + + args->oend = oend; + args->dt = dt; + + return 0; +} + +static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) +{ + /* Validate that we haven't overwritten. */ + if (args->op[stream] > segmentEnd) + return ERROR(corruption_detected); + /* Validate that we haven't read beyond iend[]. + * Note that ip[] may be < iend[] because the MSB is + * the next bit to read, and we may have consumed 100% + * of the stream, so down to iend[i] - 8 is valid. + */ + if (args->ip[stream] < args->iend[stream] - 8) + return ERROR(corruption_detected); + + /* Construct the BIT_DStream_t. */ + bit->bitContainer = MEM_readLE64(args->ip[stream]); + bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); + bit->start = (const char*)args->iend[0]; + bit->limitPtr = bit->start + sizeof(size_t); + bit->ptr = (const char*)args->ip[stream]; + + return 0; +} +#endif + #ifndef HUF_FORCE_DECOMPRESS_X2 /*-***************************/ /* single-symbol decoding */ /*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ +typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */ /* * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at @@ -122,14 +283,44 @@ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decodi static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { U64 D4; if (MEM_isLittleEndian()) { - D4 = symbol + (nbBits << 8); - } else { D4 = (symbol << 8) + nbBits; + } else { + D4 = symbol + (nbBits << 8); } D4 *= 0x0001000100010001ULL; return D4; } +/* + * Increase the tableLog to targetTableLog and rescales the stats. + * If tableLog > targetTableLog this is a no-op. + * @returns New tableLog + */ +static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) +{ + if (tableLog > targetTableLog) + return tableLog; + if (tableLog < targetTableLog) { + U32 const scale = targetTableLog - tableLog; + U32 s; + /* Increase the weight for all non-zero probability symbols by scale. */ + for (s = 0; s < nbSymbols; ++s) { + huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale); + } + /* Update rankVal to reflect the new weights. + * All weights except 0 get moved to weight + scale. + * Weights [1, scale] are empty. + */ + for (s = targetTableLog; s > scale; --s) { + rankVal[s] = rankVal[s - scale]; + } + for (s = scale; s > 0; --s) { + rankVal[s] = 0; + } + } + return targetTableLog; +} + typedef struct { U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; @@ -162,8 +353,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); if (HUF_isError(iSize)) return iSize; + /* Table header */ { DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog + 1; + U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); + tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; @@ -207,7 +402,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr /* fill DTable * We fill all entries of each weight in order. - * That way length is a constant for each iteration of the outter loop. + * That way length is a constant for each iteration of the outer loop. * We can switch based on the length to a different inner loop which is * optimized for that particular case. */ @@ -304,11 +499,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons BYTE* const pStart = p; /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + if ((pEnd - p) > 3) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + } else { + BIT_reloadDStream(bitDPtr); } /* [0-3] symbols remaining */ @@ -388,33 +587,36 @@ HUF_decompress4X1_usingDTable_internal_body( U32 endSignal = 1; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } } /* check corruption */ @@ -440,6 +642,79 @@ HUF_decompress4X1_usingDTable_internal_body( } } +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if HUF_NEED_DEFAULT_FUNCTION +static +size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; + +static HUF_ASM_X86_64_BMI2_ATTRS +size_t +HUF_decompress4X1_usingDTable_internal_bmi2_asm( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + void const* dt = DTable + 1; + const BYTE* const iend = (const BYTE*)cSrc + 6; + BYTE* const oend = (BYTE*)dst + dstSize; + HUF_DecompressAsmArgs args; + { + size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init asm args"); + if (ret != 0) + return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + } + + assert(args.ip[0] >= args.ilimit); + HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); + + /* Our loop guarantees that ip[] >= ilimit and that we haven't + * overwritten any op[]. + */ + assert(args.ip[0] >= iend); + assert(args.ip[1] >= iend); + assert(args.ip[2] >= iend); + assert(args.ip[3] >= iend); + assert(args.op[3] <= oend); + (void)iend; + + /* finish bit streams one by one. */ + { + size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + /* Decompress and validate that we've produced exactly the expected length. */ + args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) return ERROR(corruption_detected); + } + } + + /* decoded size */ + return dstSize; +} +#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const void *cSrc, @@ -447,8 +722,28 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const HUF_DTable *DTable); HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) +static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +# else + return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); +# endif + } +#else + (void)bmi2; +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +#else + return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); +#endif +} size_t HUF_decompress1X1_usingDTable( @@ -518,106 +813,226 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* *************************/ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef struct { BYTE symbol; } sortedSymbol_t; typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; +/* + * Constructs a HUF_DEltX2 in a U32. + */ +static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) +{ + U32 seq; + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3); + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32)); + if (MEM_isLittleEndian()) { + seq = level == 1 ? symbol : (baseSeq + (symbol << 8)); + return seq + (nbBits << 16) + ((U32)level << 24); + } else { + seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol); + return (seq << 16) + (nbBits << 8) + (U32)level; + } +} -/* HUF_fillDTableX2Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +/* + * Constructs a HUF_DEltX2. + */ +static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) { HUF_DEltX2 DElt; - U32* rankVal = wksp; + U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val)); + ZSTD_memcpy(&DElt, &val, sizeof(val)); + return DElt; +} + +/* + * Constructs 2 HUF_DEltX2s and packs them into a U64. + */ +static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) +{ + U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + return (U64)DElt + ((U64)DElt << 32); +} - assert(wkspSize >= HUF_TABLELOG_MAX + 1); - (void)wkspSize; - /* get pre-calculated rankVal */ - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); +/* + * Fills the DTable rank with all the symbols from [begin, end) that are each + * nbBits long. + * + * @param DTableRank The start of the rank in the DTable. + * @param begin The first symbol to fill (inclusive). + * @param end The last symbol to fill (exclusive). + * @param nbBits Each symbol is nbBits long. + * @param tableLog The table log. + * @param baseSeq If level == 1 { 0 } else { the first level symbol } + * @param level The level in the table. Must be 1 or 2. + */ +static void HUF_fillDTableX2ForWeight( + HUF_DEltX2* DTableRank, + sortedSymbol_t const* begin, sortedSymbol_t const* end, + U32 nbBits, U32 tableLog, + U16 baseSeq, int const level) +{ + U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */); + const sortedSymbol_t* ptr; + assert(level >= 1 && level <= 2); + switch (length) { + case 1: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + *DTableRank++ = DElt; + } + break; + case 2: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + DTableRank[0] = DElt; + DTableRank[1] = DElt; + DTableRank += 2; + } + break; + case 4: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + DTableRank += 4; + } + break; + case 8: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + DTableRank += 8; + } + break; + default: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + HUF_DEltX2* const DTableRankEnd = DTableRank + length; + for (; DTableRank != DTableRankEnd; DTableRank += 8) { + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + } + } + break; + } +} - /* fill skipped values */ +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits, + const U32* rankVal, const int minWeight, const int maxWeight1, + const sortedSymbol_t* sortedSymbols, U32 const* rankStart, + U32 nbBitsBaseline, U16 baseSeq) +{ + /* Fill skipped values (all positions up to rankVal[minWeight]). + * These are positions only get a single symbol because the combined weight + * is too large. + */ if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; + U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */); + U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1); + int const skipSize = rankVal[minWeight]; + assert(length > 1); + assert((U32)skipSize < length); + switch (length) { + case 2: + assert(skipSize == 1); + ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2)); + break; + case 4: + assert(skipSize <= 4); + ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2)); + break; + default: + { + int i; + for (i = 0; i < skipSize; i += 8) { + ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2)); + } + } + } } - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ - - rankVal[weight] += length; - } } + /* Fill each of the second level symbols by weight. */ + { + int w; + for (w = minWeight; w < maxWeight1; ++w) { + int const begin = rankStart[w]; + int const end = rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; + U32 const totalBits = nbBits + consumedBits; + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedSymbols + begin, sortedSymbols + end, + totalBits, targetLog, + baseSeq, /* level */ 2); + } + } } - static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, + const sortedSymbol_t* sortedList, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) + const U32 nbBitsBaseline) { - U32* rankVal = wksp; + U32* const rankVal = rankValOrigin[0]; const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - assert(wkspSize >= HUF_TABLELOG_MAX + 1); - wksp += HUF_TABLELOG_MAX + 1; - wkspSize -= HUF_TABLELOG_MAX + 1; - - ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); - - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; + int w; + int const wEnd = (int)maxWeight + 1; + + /* Fill DTable in order of weight. */ + for (w = 1; w < wEnd; ++w) { + int const begin = (int)rankStart[w]; + int const end = (int)rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; + + if (targetLog-nbBits >= minBits) { + /* Enough room for a second symbol. */ + int start = rankVal[w]; + U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */); int minWeight = nbBits + scaleLog; + int s; if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol, wksp, wkspSize); + /* Fill the DTable for every symbol of weight w. + * These symbols get at least 1 second symbol. + */ + for (s = begin; s != end; ++s) { + HUF_fillDTableX2Level2( + DTable + start, targetLog, nbBits, + rankValOrigin[nbBits], minWeight, wEnd, + sortedList, rankStart, + nbBitsBaseline, sortedList[s].symbol); + start += length; + } } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; + /* Only a single symbol. */ + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedList + begin, sortedList + end, + nbBits, targetLog, + /* baseSeq */ 0, /* level */ 1); + } } } typedef struct { rankValCol_t rankVal[HUF_TABLELOG_MAX]; U32 rankStats[HUF_TABLELOG_MAX + 1]; - U32 rankStart0[HUF_TABLELOG_MAX + 2]; + U32 rankStart0[HUF_TABLELOG_MAX + 3]; sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; @@ -627,9 +1042,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { - U32 tableLog, maxW, sizeOfSort, nbSymbols; + return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog, maxW, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; + U32 maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; @@ -647,11 +1069,12 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); if (HUF_isError(iSize)) return iSize; /* check result */ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG; /* find maxWeight */ for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ @@ -664,7 +1087,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, rankStart[w] = curr; } rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ - sizeOfSort = nextRankStart; + rankStart[maxW+1] = nextRankStart; } /* sort symbols by weight */ @@ -673,7 +1096,6 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, U32 const w = wksp->weightList[s]; U32 const r = rankStart[w]++; wksp->sortedSymbol[r].symbol = (BYTE)s; - wksp->sortedSymbol[r].weight = (BYTE)w; } rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ } @@ -698,10 +1120,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, } } } } HUF_fillDTableX2(dt, maxTableLog, - wksp->sortedSymbol, sizeOfSort, + wksp->sortedSymbol, wksp->rankStart0, wksp->rankVal, maxW, - tableLog+1, - wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + tableLog+1); dtd.tableLog = (BYTE)maxTableLog; dtd.tableType = 1; @@ -714,7 +1135,7 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - ZSTD_memcpy(op, dt+val, 2); + ZSTD_memcpy(op, &dt[val].sequence, 2); BIT_skipBits(DStream, dt[val].nbBits); return dt[val].length; } @@ -723,15 +1144,17 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - ZSTD_memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { + ZSTD_memcpy(op, &dt[val].sequence, 1); + if (dt[val].length==1) { + BIT_skipBits(DStream, dt[val].nbBits); + } else { if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { BIT_skipBits(DStream, dt[val].nbBits); if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } + } + } return 1; } @@ -753,19 +1176,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, BYTE* const pStart = p; /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) { + if (dtLog <= 11 && MEM_64bits()) { + /* up to 10 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) { + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } else { + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } + } else { + BIT_reloadDStream(bitDPtr); } /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + if ((size_t)(pEnd - p) >= 2) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + } if (p < pEnd) p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); @@ -799,7 +1240,6 @@ HUF_decompress1X2_usingDTable_internal_body( /* decoded size */ return dstSize; } - FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -841,57 +1281,60 @@ HUF_decompress4X2_usingDTable_internal_body( U32 const dtLog = dtd.tableLog; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit); ) { #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; #else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = (U32)LIKELY((U32) - (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY((U32) + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); #endif + } } /* check corruption */ @@ -915,8 +1358,99 @@ HUF_decompress4X2_usingDTable_internal_body( } } +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if HUF_NEED_DEFAULT_FUNCTION +static +size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; + +static HUF_ASM_X86_64_BMI2_ATTRS size_t +HUF_decompress4X2_usingDTable_internal_bmi2_asm( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { + void const* dt = DTable + 1; + const BYTE* const iend = (const BYTE*)cSrc + 6; + BYTE* const oend = (BYTE*)dst + dstSize; + HUF_DecompressAsmArgs args; + { + size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init asm args"); + if (ret != 0) + return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + } + + assert(args.ip[0] >= args.ilimit); + HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); + + /* note : op4 already verified within main loop */ + assert(args.ip[0] >= iend); + assert(args.ip[1] >= iend); + assert(args.ip[2] >= iend); + assert(args.ip[3] >= iend); + assert(args.op[3] <= oend); + (void)iend; + + /* finish bitStreams one by one */ + { + size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) + return ERROR(corruption_detected); + } + } + + /* decoded size */ + return dstSize; +} +#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ + +static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +# else + return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); +# endif + } +#else + (void)bmi2; +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); +#else + return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); +#endif +} + HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) size_t HUF_decompress1X2_usingDTable( void* dst, size_t dstSize, @@ -1025,25 +1559,25 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = { /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ + {{0,0}, {1,1}}, /* Q==0 : impossible */ + {{0,0}, {1,1}}, /* Q==1 : impossible */ + {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */ + {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */ + {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */ + {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */ + {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */ + {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */ + {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */ + {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */ + {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */ + {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */ + {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */ + {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */ + {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */ + {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */ }; #endif @@ -1070,7 +1604,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) U32 const D256 = (U32)(dstSize >> 8); U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ return DTime1 < DTime0; } #endif diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c index 6928e85f9d193..b9b935a9f5c0d 100644 --- a/lib/zstd/decompress/zstd_decompress.c +++ b/lib/zstd/decompress/zstd_decompress.c @@ -53,7 +53,6 @@ * Dependencies *********************************************************/ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ -#include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" @@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->inBuffSize = 0; dctx->outBuffSize = 0; dctx->streamStage = zdss_init; - dctx->legacyContext = NULL; - dctx->previousLegacyVersion = 0; dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; - dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); +#if DYNAMIC_BMI2 + dctx->bmi2 = ZSTD_cpuSupportsBmi2(); +#endif dctx->ddictSet = NULL; ZSTD_DCtx_resetParameters(dctx); #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION @@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) return dctx; } -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ +static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); @@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) } } +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_internal(customMem); +} + ZSTD_DCtx* ZSTD_createDCtx(void) { DEBUGLOG(3, "ZSTD_createDCtx"); - return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); } static void ZSTD_clearDict(ZSTD_DCtx* dctx) @@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) return 0; } +/*! ZSTD_isSkippableFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + */ +unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } + return 0; +} + /* ZSTD_frameHeaderSize_internal() : * srcSize must be large enough to reach header size fields. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. @@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); } - /* ZSTD_getFrameContentSize() : * compatible with legacy mode * @return : decompressed size of the single frame pointed to be `src` if known, otherwise @@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) } } +/*! ZSTD_readSkippableFrame() : + * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested + * in the magicVariant. + * + * Returns an error if destination buffer is not large enough, or if the frame is not skippable. + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, + const void* src, size_t srcSize) +{ + U32 const magicNumber = MEM_readLE32(src); + size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); + size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; + + /* check input validity */ + RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); + RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); + + /* deliver payload */ + if (skippableContentSize > 0 && dst != NULL) + ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); + if (magicVariant != NULL) + *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; + return skippableContentSize; +} + /* ZSTD_findDecompressedSize() : * compatible with legacy mode * `srcSize` must be the exact length of some number of ZSTD compressed and/or @@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming); break; case bt_raw : decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); @@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr { #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem); RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); @@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t return dctx->expected; if (dctx->bType != bt_raw) return dctx->expected; - return MIN(MAX(inputSize, 1), dctx->expected); + return BOUNDED(1, inputSize, dctx->expected); } ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { @@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : @@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, ZSTD_DStream* ZSTD_createDStream(void) { DEBUGLOG(3, "ZSTD_createDStream"); - return ZSTD_createDStream_advanced(ZSTD_defaultCMem); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); } ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) @@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) { - return ZSTD_createDCtx_advanced(customMem); + return ZSTD_createDCtx_internal(customMem); } size_t ZSTD_freeDStream(ZSTD_DStream* zds) @@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) { size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ + unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, @@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(5, "stage zdss_init => transparent reset "); zds->streamStage = zdss_loadHeader; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; zds->hostageByte = 0; zds->expectedOutBuffer = *output; ZSTD_FALLTHROUGH; diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c index 2d101d9a842ec..c1913b8e7c897 100644 --- a/lib/zstd/decompress/zstd_decompress_block.c +++ b/lib/zstd/decompress/zstd_decompress_block.c @@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, } } +/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */ +static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, + const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) +{ + if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) + { + /* room for litbuffer to fit without read faulting */ + dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_in_dst; + } + else if (litSize > ZSTD_LITBUFFEREXTRASIZE) + { + /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + if (splitImmediately) { + /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; + } + else { + /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; + dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; + } + dctx->litBufferLocation = ZSTD_split; + } + else + { + /* fits entirely within litExtraBuffer, so no split is necessary */ + dctx->litBuffer = dctx->litExtraBuffer; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + } +} /* Hidden declaration for fullbench */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize); + const void* src, size_t srcSize, + void* dst, size_t dstCapacity, const streaming_operation streaming); /*! ZSTD_decodeLiteralsBlock() : + * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored + * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current + * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being + * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write. + * * @return : nb of bytes read from src (< srcSize ) * note : symbol not declared but exposed for fullbench */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ + const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ + void* dst, size_t dstCapacity, const streaming_operation streaming) { DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); @@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); size_t hufSuccess; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (singleStream) { hufSuccess = HUF_decompress1X_usingDTable_bmi2( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); + dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); } else { hufSuccess = HUF_decompress4X_usingDTable_bmi2( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); + dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); } } else { if (singleStream) { @@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); + sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); #endif } else { hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); + sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); } } + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); + dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; + } RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); @@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litSize = litSize; dctx->litEntropy = 1; if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return litCSize + lhSize; } case set_basic: { size_t litSize, lhSize; U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize); + } dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; - ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return lhSize+litSize; } /* direct reference into compressed stream */ dctx->litPtr = istart+lhSize; dctx->litSize = litSize; + dctx->litBufferEnd = dctx->litPtr + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; return lhSize+litSize; } case set_rle: { U32 const lhlCode = ((istart[0]) >> 2) & 3; size_t litSize, lhSize; + size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); switch(lhlCode) { case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize); + } dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; @@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<nbBits = 0; cell->nextState = 0; assert(nbAddBits < 255); - cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->nbAdditionalBits = nbAddBits; cell->baseValue = baseValue; } @@ -367,7 +443,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB FORCE_INLINE_TEMPLATE void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; @@ -478,7 +554,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); assert(nbAdditionalBits[symbol] < 255); - tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol]; + tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; tableDecode[u].baseValue = baseValue[symbol]; } } @@ -487,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, /* Avoids the FORCE_INLINE of the _body() function. */ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, @@ -495,9 +571,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, } #if DYNAMIC_BMI2 -TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, +BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, @@ -507,7 +583,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize, int bmi2) { #if DYNAMIC_BMI2 @@ -529,7 +605,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, symbolEncodingType_e type, unsigned max, U32 maxLog, const void* src, size_t srcSize, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, int bmi2) @@ -541,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; - U32 const nbBits = nbAdditionalBits[symbol]; + U8 const nbBits = nbAdditionalBits[symbol]; ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); } *DTablePtr = DTableSpace; @@ -620,7 +696,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -632,7 +708,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -644,7 +720,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); + ZSTD_DCtx_get_bmi2(dctx)); RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } @@ -658,7 +734,6 @@ typedef struct { size_t litLength; size_t matchLength; size_t offset; - const BYTE* match; } seq_t; typedef struct { @@ -672,9 +747,6 @@ typedef struct { ZSTD_fseState stateOffb; ZSTD_fseState stateML; size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; } seqState_t; /*! ZSTD_overlapCopy8() : @@ -717,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. * The src buffer must be before the dst buffer. */ -static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { +static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { ptrdiff_t const diff = op - ip; BYTE* const oend = op + length; @@ -733,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ assert(length >= 8); ZSTD_overlapCopy8(&op, &ip, diff); + length -= 8; assert(op - ip >= 8); assert(op <= oend); } @@ -747,8 +820,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ assert(oend > oend_w); ZSTD_wildcopy(op, ip, oend_w - op, ovtype); ip += oend_w - op; - op = oend_w; + op += oend_w - op; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_safecopyDstBeforeSrc(): + * This version allows overlap with dst before src, or handles the non-overlap case with dst after src + * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + if (length < 8 || diff > -8) { + /* Handle short lengths, close overlaps, and dst not before src. */ + while (op < oend) *op++ = *ip++; + return; + } + + if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) { + ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap); + ip += oend - WILDCOPY_OVERLENGTH - op; + op += oend - WILDCOPY_OVERLENGTH - op; } + /* Handle the leftovers. */ while (op < oend) *op++ = *ip++; } @@ -763,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ */ FORCE_NOINLINE size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -788,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op, if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); - match = dictEnd - (prefixStart-match); + match = dictEnd - (prefixStart - match); if (match + sequence.matchLength <= dictEnd) { ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - ZSTD_memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +/* ZSTD_execSequenceEndSplitLitBuffer(): + * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer"); + ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart - match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); return sequenceLength; } HINT_INLINE size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -817,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op, const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + assert(op != NULL /* Precondition */); assert(oend_w < oend /* No underflow */); /* Handle edge cases in a slow path: @@ -828,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op, iLitEnd > litLimit || oMatchEnd > oend_w || (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) - return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ assert(op <= oLitEnd /* No overflow */); @@ -896,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op, return sequenceLength; } + static void ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) { @@ -909,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS } FORCE_INLINE_TEMPLATE void -ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) -{ - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits) { - U32 const nbBits = DInfo.nbBits; size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; + DStatePtr->state = nextState + lowBits; } /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum @@ -936,116 +1164,105 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { seq_t seq; - ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; - ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; - ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; - U32 const llBase = llDInfo.baseValue; - U32 const mlBase = mlDInfo.baseValue; - U32 const ofBase = ofDInfo.baseValue; - BYTE const llBits = llDInfo.nbAdditionalBits; - BYTE const mlBits = mlDInfo.nbAdditionalBits; - BYTE const ofBits = ofDInfo.nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; - - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llBase == 0); - if (LIKELY((ofBits == 0))) { - if (LIKELY(!ll0)) - offset = seqState->prevOffset[0]; - else { - offset = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; + const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; + const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; + const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; + seq.matchLength = mlDInfo->baseValue; + seq.litLength = llDInfo->baseValue; + { U32 const ofBase = ofDInfo->baseValue; + BYTE const llBits = llDInfo->nbAdditionalBits; + BYTE const mlBits = mlDInfo->nbAdditionalBits; + BYTE const ofBits = ofDInfo->nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + U16 const llNext = llDInfo->nextState; + U16 const mlNext = mlDInfo->nextState; + U16 const ofNext = ofDInfo->nextState; + U32 const llnbBits = llDInfo->nbBits; + U32 const mlnbBits = mlDInfo->nbBits; + U32 const ofnbBits = ofDInfo->nbBits; + /* + * As gcc has better branch and block analyzers, sometimes it is only + * valuable to mark likelyness for clang, it gives around 3-4% of + * performance. + */ + + /* sequence */ + { size_t offset; + #if defined(__clang__) + if (LIKELY(ofBits > 1)) { + #else + if (ofBits > 1) { + #endif + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } - - seq.matchLength = mlBase; - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase; - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update - * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). - * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). - * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the - * better option, so it is the default for other compilers. But, if you - * measure that it is worse, please put up a pull request. - */ - { -#if !defined(__clang__) - const int kUseUpdateFseState = 1; -#else - const int kUseUpdateFseState = 0; -#endif - if (kUseUpdateFseState) { - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - } else { - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + U32 const ll0 = (llDInfo->baseValue == 0); + if (LIKELY((ofBits == 0))) { + offset = seqState->prevOffset[ll0]; + seqState->prevOffset[1] = seqState->prevOffset[!ll0]; + seqState->prevOffset[0] = offset; + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; } + + #if defined(__clang__) + if (UNLIKELY(mlBits > 0)) + #else + if (mlBits > 0) + #endif + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + #if defined(__clang__) + if (UNLIKELY(llBits > 0)) + #else + if (llBits > 0) + #endif + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ } return seq; @@ -1098,9 +1315,11 @@ MEM_STATIC void ZSTD_assertValidSequence( #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + + FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE -ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, +ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset, @@ -1112,17 +1331,16 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_body"); + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); (void)frame; /* Regen sequences */ if (nbSeq) { seqState_t seqState; - size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } RETURN_ERROR_IF( @@ -1138,70 +1356,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BIT_DStream_endOfBuffer < BIT_DStream_completed && BIT_DStream_completed < BIT_DStream_overflow); + /* decompress without overrunning litPtr begins */ + { + seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * This issue has been reproduced on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * Alignment is done for each of the three major decompression loops: + * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer + * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer + * - ZSTD_decompressSequences_body + * Alignment choices are made to minimize large swings on bad cases and influence on performance + * from changes external to this code, rather than to overoptimize on the current commit. + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ #if defined(__x86_64__) - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression - * speed swings based on the alignment of the decompression loop. This - * performance swing is caused by parts of the decompression loop falling - * out of the DSB. The entire decompression loop should fit in the DSB, - * when it can't we get much worse performance. You can measure if you've - * hit the good case or the bad case with this perf command for some - * compressed file test.zst: - * - * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ - * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst - * - * If you see most cycles served out of the MITE you've hit the bad case. - * If you see most cycles served out of the DSB you've hit the good case. - * If it is pretty even then you may be in an okay case. - * - * I've been able to reproduce this issue on the following CPUs: - * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 - * Use Instruments->Counters to get DSB/MITE cycles. - * I never got performance swings, but I was able to - * go from the good case of mostly DSB to half of the - * cycles served from MITE. - * - Coffeelake: Intel i9-9900k - * - * I haven't been able to reproduce the instability or DSB misses on any - * of the following CPUS: - * - Haswell - * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH - * - Skylake - * - * If you are seeing performance stability this script can help test. - * It tests on 4 commits in zstd where I saw performance change. - * - * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 - */ - __asm__(".p2align 5"); - __asm__("nop"); - __asm__(".p2align 4"); + __asm__(".p2align 6"); +# if __GNUC__ >= 7 + /* good for gcc-7, gcc-9, and gcc-11 */ + __asm__("nop"); + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +# if __GNUC__ == 8 || __GNUC__ == 10 + /* good for gcc-8 and gcc-10 */ + __asm__("nop"); + __asm__(".p2align 3"); +# endif +# endif +#endif + + /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ + for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { + size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; + BIT_reloadDStream(&(seqState.DStream)); + sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + } + + /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ + if (nbSeq > 0) { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence.litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (--nbSeq) + BIT_reloadDStream(&(seqState.DStream)); + } + } + } + + if (nbSeq > 0) /* there is remaining lit from extra buffer */ + { + +#if defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ != 7 + /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */ + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# elif __GNUC__ >= 11 + __asm__(".p2align 3"); +# else + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif +#endif + + for (; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; + BIT_reloadDStream(&(seqState.DStream)); + } + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ + { + size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + } + { size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); + const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ >= 7 + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# else + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif #endif + for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); #endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); op += oneSeqSize; - /* gcc and clang both don't like early returns in this loop. - * Instead break and check for an error at the end of the loop. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) { - error = oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; - } - if (UNLIKELY(!--nbSeq)) break; + BIT_reloadDStream(&(seqState.DStream)); } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; RETURN_ERROR_IF(nbSeq, corruption_detected, ""); RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ @@ -1229,9 +1632,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } + +static size_t +ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE size_t +ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, @@ -1243,10 +1674,10 @@ ZSTD_decompressSequencesLong_body( const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + maxDstSize; + BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* litBufferEnd = dctx->litBufferEnd; const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); @@ -1254,18 +1685,17 @@ ZSTD_decompressSequencesLong_body( /* Regen sequences */ if (nbSeq) { -#define STORED_SEQS 4 +#define STORED_SEQS 8 #define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 +#define ADVANCED_SEQS STORED_SEQS seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState; int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + dctx->fseEntropy = 1; { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( @@ -1277,36 +1707,100 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) + { + /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ - sequences[seqNb & STORED_SEQS_MASK] = sequence; - op += oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } + else + { + /* lit buffer is either wholly contained in first or second split, or not split at all*/ + oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } } RETURN_ERROR_IF(seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) + { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence->litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { + size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); #endif - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + } + else + { + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } /* save reps for next block */ @@ -1314,10 +1808,21 @@ ZSTD_decompressSequencesLong_body( } /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; + if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ + { + size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + } + { size_t const lastLLSize = litBufferEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { - ZSTD_memcpy(op, litPtr, lastLLSize); + ZSTD_memmove(op, litPtr, lastLLSize); op += lastLLSize; } } @@ -1341,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #if DYNAMIC_BMI2 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1351,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } +static BMI2_TARGET_ATTRIBUTE size_t +DONT_VECTORIZE +ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, @@ -1383,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 - if (dctx->bmi2) { + if (ZSTD_DCtx_get_bmi2(dctx)) { return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +static size_t +ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); +#if DYNAMIC_BMI2 + if (ZSTD_DCtx_get_bmi2(dctx)) { + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1407,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 - if (dctx->bmi2) { + if (ZSTD_DCtx_get_bmi2(dctx)) { return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif @@ -1448,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame) + const void* src, size_t srcSize, const int frame, const streaming_operation streaming) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; /* isLongOffset must be true if there are long offsets. @@ -1463,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); if (ZSTD_isError(litCSize)) return litCSize; ip += litCSize; @@ -1511,7 +2040,10 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + if (dctx->litBufferLocation == ZSTD_split) + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); + else + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } @@ -1534,7 +2066,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, { size_t dSize; ZSTD_checkContinuity(dctx, dst, dstCapacity); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); dctx->previousDstEnd = (char*)dst + dSize; return dSize; } diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h index e7f5f6689459f..3d2d57a5d25a7 100644 --- a/lib/zstd/decompress/zstd_decompress_block.h +++ b/lib/zstd/decompress/zstd_decompress_block.h @@ -33,6 +33,12 @@ */ + /* Streaming state is used to inform allocation of the literal buffer */ +typedef enum { + not_streaming = 0, + is_streaming = 1 +} streaming_operation; + /* ZSTD_decompressBlock_internal() : * decompress block, starting at `src`, * into destination buffer `dst`. @@ -41,7 +47,7 @@ */ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame); + const void* src, size_t srcSize, const int frame, const streaming_operation streaming); /* ZSTD_buildFSETable() : * generate FSE decoding table for one symbol (ll, ml or off) @@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, */ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, + const U32* baseValue, const U8* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize, int bmi2); diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h index 4b9052f687558..98102edb6a832 100644 --- a/lib/zstd/decompress/zstd_decompress_internal.h +++ b/lib/zstd/decompress/zstd_decompress_internal.h @@ -20,7 +20,7 @@ * Dependencies *********************************************************/ #include "../common/mem.h" /* BYTE, U16, U32 */ -#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ +#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ @@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { +static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -106,6 +106,22 @@ typedef struct { size_t ddictPtrCount; } ZSTD_DDictHashSet; +#ifndef ZSTD_DECODER_INTERNAL_BUFFER +# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) +#endif + +#define ZSTD_LBMIN 64 +#define ZSTD_LBMAX (128 << 10) + +/* extra buffer, compensates when dst is not large enough to store litBuffer */ +#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) + +typedef enum { + ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ + ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ + ZSTD_split = 2 /* Split between litExtraBuffer and dst */ +} ZSTD_litLocation_e; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -136,7 +152,9 @@ struct ZSTD_DCtx_s size_t litSize; size_t rleSize; size_t staticSize; +#if DYNAMIC_BMI2 != 0 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ +#endif /* dictionary */ ZSTD_DDict* ddictLocal; @@ -158,16 +176,16 @@ struct ZSTD_DCtx_s size_t outStart; size_t outEnd; size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; U32 hostageByte; int noForwardProgress; ZSTD_bufferMode_e outBufferMode; ZSTD_outBuffer expectedOutBuffer; /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE* litBuffer; + const BYTE* litBufferEnd; + ZSTD_litLocation_e litBufferLocation; + BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; size_t oversizedDuration; @@ -180,6 +198,14 @@ struct ZSTD_DCtx_s /* Tracing */ }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ +MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { +#if DYNAMIC_BMI2 != 0 + return dctx->bmi2; +#else + (void)dctx; + return 0; +#endif +} /*-******************************************************* * Shared internal functions diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h index 0fbec508f285e..a06ca187aab5f 100644 --- a/lib/zstd/decompress_sources.h +++ b/lib/zstd/decompress_sources.h @@ -16,6 +16,12 @@ * decompression. */ +/* + * Disable the ASM Huffman implementation because we need to + * include all the sources. + */ +#define ZSTD_DISABLE_ASM 1 + #include "common/debug.c" #include "common/entropy_common.c" #include "common/error_private.c" diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c index 65548a4bb9341..04e1b5c01d9b6 100644 --- a/lib/zstd/zstd_compress_module.c +++ b/lib/zstd/zstd_compress_module.c @@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream); size_t zstd_reset_cstream(zstd_cstream *cstream, unsigned long long pledged_src_size) { - return ZSTD_resetCStream(cstream, pledged_src_size); + if (pledged_src_size == 0) + pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; + ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) ); + ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) ); + return 0; } EXPORT_SYMBOL(zstd_reset_cstream); -- GitLab From b39301ee1f268c89bd2a8eae257b7d2f50308598 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:07 +0100 Subject: [PATCH 049/694] soundwire: qcom: remove unused SWRM_SPECIAL_CMD_ID Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-4-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index b33d5db494a54..185d75ee81ee6 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -104,7 +104,6 @@ #define SWRM_REG_VAL_PACK(data, dev, id, reg) \ ((reg) | ((id) << 16) | ((dev) << 20) | ((data) << 24)) -#define SWRM_SPECIAL_CMD_ID 0xF #define MAX_FREQ_NUM 1 #define TIMEOUT_MS 100 #define QCOM_SWRM_MAX_RD_LEN 0x1 -- GitLab From 1cdbfd4c9dc95d9b1e6bcbeba71cfdc70732b50e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:08 +0100 Subject: [PATCH 050/694] soundwire: qcom: make reset optional for v1.6 controller On Some Qualcomm SOCs like sc8280xp which uses v1.6 soundwire controller reset is not mandatory, so make this an optional one. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-5-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 185d75ee81ee6..49e6d745e0fa5 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1321,8 +1321,8 @@ static int qcom_swrm_probe(struct platform_device *pdev) } if (data->sw_clk_gate_required) { - ctrl->audio_cgcr = devm_reset_control_get_exclusive(dev, "swr_audio_cgcr"); - if (IS_ERR_OR_NULL(ctrl->audio_cgcr)) { + ctrl->audio_cgcr = devm_reset_control_get_optional_exclusive(dev, "swr_audio_cgcr"); + if (IS_ERR(ctrl->audio_cgcr)) { dev_err(dev, "Failed to get cgcr reset ctrl required for SW gating\n"); ret = PTR_ERR(ctrl->audio_cgcr); goto err_init; -- GitLab From df73f66c7dd4474a05e07f911427043bc32cff31 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:09 +0100 Subject: [PATCH 051/694] dt-bindings: soundwire: qcom: add v1.7.0 support Signed-off-by: Srinivas Kandagatla Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221026110210.6575-6-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/soundwire/qcom,sdw.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt index c85c25779e3fc..e0faed8dceac7 100644 --- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt +++ b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt @@ -13,6 +13,7 @@ board specific bus parameters. "qcom,soundwire-v1.5.0" "qcom,soundwire-v1.5.1" "qcom,soundwire-v1.6.0" + "qcom,soundwire-v1.7.0" - reg: Usage: required Value type: -- GitLab From cf43cd33b67a291fadcd16b1ad2f435bd2e60749 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:10 +0100 Subject: [PATCH 052/694] soundwire: qcom: add support for v1.7 Soundwire Controller This patch add support for v1.7 SoundWire Controller which has support for Multi-EE (Execution Environment), resulting in a new register and extending field in BUS_CTRL register. With these updates v1.7.0 is fully supported. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-7-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 49e6d745e0fa5..ce00a5cf980c7 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -25,6 +25,8 @@ #define SWRM_COMP_SW_RESET 0x008 #define SWRM_COMP_STATUS 0x014 +#define SWRM_LINK_MANAGER_EE 0x018 +#define SWRM_EE_CPU 1 #define SWRM_FRM_GEN_ENABLED BIT(0) #define SWRM_COMP_HW_VERSION 0x00 #define SWRM_COMP_CFG_ADDR 0x04 @@ -684,7 +686,14 @@ static int qcom_swrm_init(struct qcom_swrm_ctrl *ctrl) u32p_replace_bits(&val, SWRM_DEF_CMD_NO_PINGS, SWRM_MCP_CFG_MAX_NUM_OF_CMD_NO_PINGS_BMSK); ctrl->reg_write(ctrl, SWRM_MCP_CFG_ADDR, val); - ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + if (ctrl->version >= 0x01070000) { + ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU); + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, + SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU); + } else { + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + } + /* Configure number of retries of a read/write cmd */ if (ctrl->version > 0x01050001) { /* Only for versions >= 1.5.1 */ @@ -1509,7 +1518,13 @@ static int __maybe_unused swrm_runtime_resume(struct device *dev) } else { reset_control_reset(ctrl->audio_cgcr); - ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + if (ctrl->version >= 0x01070000) { + ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU); + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, + SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU); + } else { + ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START); + } ctrl->reg_write(ctrl, SWRM_INTERRUPT_CLEAR, SWRM_INTERRUPT_STATUS_MASTER_CLASH_DET); @@ -1573,6 +1588,7 @@ static const struct of_device_id qcom_swrm_of_match[] = { { .compatible = "qcom,soundwire-v1.3.0", .data = &swrm_v1_3_data }, { .compatible = "qcom,soundwire-v1.5.1", .data = &swrm_v1_5_data }, { .compatible = "qcom,soundwire-v1.6.0", .data = &swrm_v1_6_data }, + { .compatible = "qcom,soundwire-v1.7.0", .data = &swrm_v1_5_data }, {/* sentinel */}, }; -- GitLab From 28d74fc36a3e667b51a437fbf6c45264a0c8f2db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:28 +0200 Subject: [PATCH 053/694] phy: qcom-qmp: drop regulator error message Regulator core already logs an error message in case requesting a regulator fails so drop the mostly redundant error message from probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 +-- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7b434e2ee640e..998c8f80ccd80 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2816,8 +2816,7 @@ static int qmp_combo_probe(struct platform_device *pdev) ret = qmp_combo_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 5fdd85a1dc3ef..45c0e2958bf63 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -869,8 +869,7 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev) ret = qmp_pcie_msm8996_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 30838ae8f0272..dc7f8ba413b9d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2445,8 +2445,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) ret = qmp_pcie_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index e28c45ab74ea2..566365fbfe1ae 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1210,8 +1210,7 @@ static int qmp_ufs_probe(struct platform_device *pdev) ret = qmp_ufs_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index b0b13fb6cb59e..a0b97fd5d0a53 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2746,8 +2746,7 @@ static int qmp_usb_probe(struct platform_device *pdev) ret = qmp_usb_vreg_init(dev, cfg); if (ret) - return dev_err_probe(dev, ret, - "failed to get regulator supplies\n"); + return ret; num = of_get_available_child_count(dev->of_node); /* do we have a rogue child node ? */ -- GitLab From 17302d3630030db09947241451f3d984bc0d3144 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:29 +0200 Subject: [PATCH 054/694] phy: qcom-qmp: drop superfluous comments Drop some unnecessary or incorrect comments. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ---- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 5 ----- 5 files changed, 18 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 998c8f80ccd80..3889dcf73c599 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1949,7 +1949,6 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) return 0; } - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2779,7 +2778,6 @@ static int qmp_combo_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ combo_cfg = of_device_get_match_data(dev); if (!combo_cfg) return -EINVAL; @@ -2787,7 +2785,6 @@ static int qmp_combo_probe(struct platform_device *pdev) usb_cfg = combo_cfg->usb_cfg; cfg = usb_cfg; /* Setup clks and regulators */ - /* per PHY serdes; usually located at base address */ usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); @@ -2796,7 +2793,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); - /* Only two serdes for combo PHY */ dp_serdes = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 45c0e2958bf63..8b74948eb467d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -420,7 +420,6 @@ static int qmp_pcie_msm8996_com_init(struct qmp_phy *qphy) return 0; } - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -845,12 +844,10 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index dc7f8ba413b9d..de04d8dd53504 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1978,7 +1978,6 @@ static int qmp_pcie_init(struct phy *phy) const struct qmp_phy_cfg *cfg = qphy->cfg; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2425,12 +2424,10 @@ static int qmp_pcie_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 566365fbfe1ae..ab69f648ee387 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -849,7 +849,6 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) void __iomem *pcs = qphy->pcs; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -1194,12 +1193,10 @@ static int qmp_ufs_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index a0b97fd5d0a53..2c5e4041bcf9d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2120,7 +2120,6 @@ static int qmp_usb_init(struct phy *phy) void __iomem *dp_com = qmp->dp_com; int ret; - /* turn on regulator supplies */ ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); if (ret) { dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret); @@ -2229,7 +2228,6 @@ static int qmp_usb_power_on(struct phy *phy) cfg->rx_tbl, cfg->rx_tbl_num, 2); } - /* Configure link rate, swing, etc. */ qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) @@ -2719,17 +2717,14 @@ static int qmp_usb_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - /* Get the specific init parameters of QMP phy */ cfg = of_device_get_match_data(dev); if (!cfg) return -EINVAL; - /* per PHY serdes; usually located at base address */ serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); - /* per PHY dp_com; if PHY has dp_com control block */ if (cfg->has_phy_dp_com_ctrl) { qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) -- GitLab From ccf6f83b1b0bbdcde1ec7c0a35dde014f7101507 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:30 +0200 Subject: [PATCH 055/694] phy: qcom-qmp-combo: drop unused in-layout configuration The QMP combo PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 54 ++++++----------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 3889dcf73c599..84380852ba5b7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -69,11 +69,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -88,14 +83,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -1346,7 +1333,6 @@ static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { }; static void qmp_combo_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -1361,19 +1347,15 @@ static void qmp_combo_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_combo_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_combo_configure_lane(base, regs, tbl, num, 0xff); + qmp_combo_configure_lane(base, tbl, num, 0xff); } static int qmp_combo_serdes_init(struct qmp_phy *qphy) @@ -1384,28 +1366,24 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_combo_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); if (cfg->type == PHY_TYPE_DP) { switch (dp_opts->link_rate) { case 1620: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_rbr, + qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, cfg->serdes_tbl_rbr_num); break; case 2700: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, cfg->serdes_tbl_hbr_num); break; case 5400: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr2, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, cfg->serdes_tbl_hbr2_num); break; case 8100: - qmp_combo_configure(serdes, cfg->regs, - cfg->serdes_tbl_hbr3, + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, cfg->serdes_tbl_hbr3_num); break; default: @@ -2069,29 +2047,25 @@ static int qmp_combo_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_combo_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_combo_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); /* Configure special DP tx tunings */ if (cfg->type == PHY_TYPE_DP) cfg->configure_dp_tx(qphy); - qmp_combo_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_combo_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); /* Configure link rate, swing, etc. */ if (cfg->type == PHY_TYPE_DP) cfg->configure_dp_phy(qphy); else - qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); -- GitLab From f2175762b4ed90048b739b32a739a2df790d4e13 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:31 +0200 Subject: [PATCH 056/694] phy: qcom-qmp-pcie: drop redundant ipq8074 power on The PCS initialisation table for IPQ8074 includes updates of the reset and start-control registers which is already handled explicitly by the driver during power on. Drop the redundant register write from the IPQ8074 configuration table and along with it the now unused "in-layout" configuration macro and code. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 42 +++++------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index de04d8dd53504..fa8bc6aeedf14 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -42,11 +42,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -61,14 +56,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -388,8 +375,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V2_PCS_RX_SIGDET_LVL, 0x99), QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M6DB_V0, 0x15), QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M3P5DB_V0, 0xe), - QMP_PHY_INIT_CFG_L(QPHY_SW_RESET, 0x0), - QMP_PHY_INIT_CFG_L(QPHY_START_CTRL, 0x3), }; static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_serdes_tbl[] = { @@ -1896,7 +1881,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { }; static void qmp_pcie_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -1911,30 +1895,25 @@ static void qmp_pcie_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_pcie_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_pcie_configure_lane(base, regs, tbl, num, 0xff); + qmp_pcie_configure_lane(base, tbl, num, 0xff); } static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; if (!tables) return; - qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num); + qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); } static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) @@ -1946,29 +1925,26 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t if (!tables) return; - qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1); + qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1); + qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2); } static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *pcs = qphy->pcs; void __iomem *pcs_misc = qphy->pcs_misc; if (!tables) return; - qmp_pcie_configure(pcs, cfg->regs, - tables->pcs, tables->pcs_num); - qmp_pcie_configure(pcs_misc, cfg->regs, - tables->pcs_misc, tables->pcs_misc_num); + qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num); + qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) -- GitLab From d3ef88635e318a7cc7e2fc26a58b4e8b56c9fb9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:32 +0200 Subject: [PATCH 057/694] phy: qcom-qmp-pcie-msm8996: drop unused in-layout configuration The MSM8996 QMP PCIe PHY driver no longer uses the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 34 ++++--------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 8b74948eb467d..31ac405d3785a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -46,11 +46,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -65,14 +60,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -346,7 +333,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -361,19 +347,15 @@ static void qmp_pcie_msm8996_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_pcie_msm8996_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_pcie_msm8996_configure_lane(base, regs, tbl, num, 0xff); + qmp_pcie_msm8996_configure_lane(base, tbl, num, 0xff); } static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) @@ -387,7 +369,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) unsigned int mask, val; int ret; - qmp_pcie_msm8996_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num); qphy_clrbits(serdes, cfg->regs[QPHY_COM_SW_RESET], SW_RESET); qphy_setbits(serdes, cfg->regs[QPHY_COM_START_CONTROL], @@ -531,13 +513,9 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_msm8996_configure_lane(tx, cfg->regs, cfg->tx_tbl, - cfg->tx_tbl_num, 1); - - qmp_pcie_msm8996_configure_lane(rx, cfg->regs, cfg->rx_tbl, - cfg->rx_tbl_num, 1); - - qmp_pcie_msm8996_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_pcie_msm8996_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_pcie_msm8996_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_pcie_msm8996_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); /* * Pull out PHY from POWER DOWN state. -- GitLab From 91496846a9e863f7caa2db4a828844746b6f6b32 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:33 +0200 Subject: [PATCH 058/694] phy: qcom-qmp-ufs: drop unused in-layout configuration The QMP UFS PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 42 ++++++------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index ab69f648ee387..02931b82132f7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -41,11 +41,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -60,14 +55,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -800,7 +787,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { }; static void qmp_ufs_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -815,19 +801,15 @@ static void qmp_ufs_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_ufs_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_ufs_configure_lane(base, regs, tbl, num, 0xff); + qmp_ufs_configure_lane(base, tbl, num, 0xff); } static int qmp_ufs_serdes_init(struct qmp_phy *qphy) @@ -837,7 +819,7 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_ufs_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_ufs_configure(serdes, serdes_tbl, serdes_tbl_num); return 0; } @@ -941,21 +923,17 @@ static int qmp_ufs_power_on(struct phy *phy) qmp_ufs_serdes_init(qphy); /* Tx, Rx, and PCS configurations */ - qmp_ufs_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_ufs_configure_lane(qphy->tx2, cfg->regs, - cfg->tx_tbl, cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_ufs_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_ufs_configure_lane(qphy->rx2, cfg->regs, - cfg->rx_tbl, cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - qmp_ufs_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); ret = reset_control_deassert(qmp->ufs_reset); if (ret) -- GitLab From 9d452c3ac257d36740580e5ce2b899bfca99fd62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:34 +0200 Subject: [PATCH 059/694] phy: qcom-qmp-usb: drop unused in-layout configuration The QMP USB PHY driver does not use the "in-layout" configuration macro to configure registers that are typically accessed using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused feature. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 42 ++++++------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2c5e4041bcf9d..3aab9ea900783 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -69,11 +69,6 @@ struct qmp_phy_init_tbl { unsigned int offset; unsigned int val; - /* - * register part of layout ? - * if yes, then offset gives index in the reg-layout - */ - bool in_layout; /* * mask of lanes for which this register is written * for cases when second lane needs different values @@ -88,14 +83,6 @@ struct qmp_phy_init_tbl { .lane_mask = 0xff, \ } -#define QMP_PHY_INIT_CFG_L(o, v) \ - { \ - .offset = o, \ - .val = v, \ - .in_layout = true, \ - .lane_mask = 0xff, \ - } - #define QMP_PHY_INIT_CFG_LANE(o, v, l) \ { \ .offset = o, \ @@ -2069,7 +2056,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { }; static void qmp_usb_configure_lane(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num, u8 lane_mask) @@ -2084,19 +2070,15 @@ static void qmp_usb_configure_lane(void __iomem *base, if (!(t->lane_mask & lane_mask)) continue; - if (t->in_layout) - writel(t->val, base + regs[t->offset]); - else - writel(t->val, base + t->offset); + writel(t->val, base + t->offset); } } static void qmp_usb_configure(void __iomem *base, - const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], int num) { - qmp_usb_configure_lane(base, regs, tbl, num, 0xff); + qmp_usb_configure_lane(base, tbl, num, 0xff); } static int qmp_usb_serdes_init(struct qmp_phy *qphy) @@ -2106,7 +2088,7 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_usb_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num); + qmp_usb_configure(serdes, serdes_tbl, serdes_tbl_num); return 0; } @@ -2214,21 +2196,17 @@ static int qmp_usb_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_usb_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_usb_configure_lane(qphy->tx2, cfg->regs, - cfg->tx_tbl, cfg->tx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_usb_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1); + qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) { - qmp_usb_configure_lane(qphy->rx2, cfg->regs, - cfg->rx_tbl, cfg->rx_tbl_num, 2); - } + if (cfg->lanes >= 2) + qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); -- GitLab From e71906144b432135b483e228d65be59fbb44c310 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:35 +0200 Subject: [PATCH 060/694] phy: qcom-qmp-pcie: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 27 +----------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index fa8bc6aeedf14..315de484f8759 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1362,9 +1362,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* QMP PHY pipe clock interface rate */ unsigned long pipe_clock_rate; @@ -1500,8 +1497,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { @@ -1529,8 +1524,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ .pipe_clock_rate = 250000000, }; @@ -1562,8 +1555,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { @@ -1594,8 +1585,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { @@ -1624,8 +1613,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { @@ -1666,8 +1653,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { @@ -1708,8 +1693,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg msm8998_pciephy_cfg = { @@ -1765,8 +1748,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { @@ -1797,8 +1778,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .phy_status = PHYSTATUS_4_20, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { @@ -1829,8 +1808,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { @@ -1876,8 +1853,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .phy_status = PHYSTATUS_4_20, .has_pwrdn_delay = true, - .pwrdn_delay_min = 995, /* us */ - .pwrdn_delay_max = 1005, /* us */ }; static void qmp_pcie_configure_lane(void __iomem *base, @@ -2037,7 +2012,7 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, mode_tables); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(1000, 1200); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); -- GitLab From 51bd33069f80705aba5f4725287bc5688ca6d92a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:36 +0200 Subject: [PATCH 061/694] phy: qcom-qmp-pcie: replace power-down delay The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). But as the vendor kernel do have a 1 ms delay *after* starting the PHY and before starting to poll the status it is possible that later contributors have simply not noticed that the mainline power-down delay is not equivalent. As the current delay before even starting the PHY is pretty much pointless and likely a mistake, move the delay after starting the PHY which avoids a few iterations of polling and speeds up startup by 1 ms (the poll loop otherwise takes about 1.8 ms). Note that MSM8998 has never used a power-down delay so add a flag to skip the delay in case starting the PHY is faster on MSM8998. This can be removed after someone takes a measurement. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 33 +++++------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 315de484f8759..151baa63e8e87 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1360,8 +1360,7 @@ struct qmp_phy_cfg { /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; - /* true, if PHY needs delay after POWER_DOWN */ - bool has_pwrdn_delay; + bool skip_start_delay; /* QMP PHY pipe clock interface rate */ unsigned long pipe_clock_rate; @@ -1495,8 +1494,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { @@ -1523,8 +1520,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - .has_pwrdn_delay = true, - .pipe_clock_rate = 250000000, }; @@ -1553,8 +1548,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { @@ -1583,8 +1576,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { @@ -1611,8 +1602,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { @@ -1651,8 +1640,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { @@ -1691,8 +1678,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg msm8998_pciephy_cfg = { @@ -1719,6 +1704,8 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, + + .skip_start_delay = true, }; static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { @@ -1746,8 +1733,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { @@ -1776,8 +1761,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS_4_20, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { @@ -1806,8 +1789,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, }; static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { @@ -1851,8 +1832,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS_4_20, - - .has_pwrdn_delay = true, }; static void qmp_pcie_configure_lane(void __iomem *base, @@ -2011,15 +1990,15 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_pcs_init(qphy, &cfg->tables); qmp_pcie_pcs_init(qphy, mode_tables); - if (cfg->has_pwrdn_delay) - usleep_range(1000, 1200); - /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + if (!cfg->skip_start_delay) + usleep_range(1000, 1200); + status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; ready = 0; -- GitLab From abc0841666b9ab6568229e6b9816505c987d8a59 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:37 +0200 Subject: [PATCH 062/694] phy: qcom-qmp-pcie-msm8996: drop power-down delay config The power-down delay was included in the first version of the QMP driver for MSM8996 as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant configuration options while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 31ac405d3785a..899be7bd4d92d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -41,7 +41,7 @@ #define PHY_INIT_COMPLETE_TIMEOUT 10000 #define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 +#define POWER_DOWN_DELAY_US_MAX 20 struct qmp_phy_init_tbl { unsigned int offset; @@ -203,12 +203,6 @@ struct qmp_phy_cfg { unsigned int mask_com_pcs_ready; /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; - - /* true, if PHY needs delay after POWER_DOWN */ - bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; }; /** @@ -326,10 +320,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .mask_com_pcs_ready = PCS_READY, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -523,8 +513,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) */ qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); - if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); -- GitLab From d71eb7083e5eea8ddddab52e9b57a9783603a95f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:38 +0200 Subject: [PATCH 063/694] phy: qcom-qmp-combo: drop sc8280xp power-down delay The SC8280XP combo PHY does not need a delay before starting the USB PHY (which is what the has_pwrdn_delay config option really controls) so drop the unnecessary delay. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 84380852ba5b7..a8e09333072e9 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1210,10 +1210,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { -- GitLab From acfee73b635bca04e8a942a162bae0c0cd84b796 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:39 +0200 Subject: [PATCH 064/694] phy: qcom-qmp-combo: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index a8e09333072e9..82055d3a3536b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -63,8 +63,6 @@ #define CLAMP_EN BIT(0) /* enables i/o clamp_n */ #define PHY_INIT_COMPLETE_TIMEOUT 10000 -#define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 struct qmp_phy_init_tbl { unsigned int offset; @@ -860,9 +858,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* Offset from PCS to PCS_USB region */ unsigned int pcs_usb_offset; @@ -1031,8 +1026,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc7180_dpphy_cfg = { @@ -1102,8 +1095,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { @@ -1138,10 +1129,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { @@ -1282,8 +1270,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8250_dpphy_cfg = { @@ -2064,7 +2050,7 @@ static int qmp_combo_power_on(struct phy *phy) qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(10, 20); if (cfg->type != PHY_TYPE_DP) { /* Pull PHY out of reset state */ -- GitLab From 898ab85d6b1e8f6271d180c47ef8a024dea9e357 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:40 +0200 Subject: [PATCH 065/694] phy: qcom-qmp-usb: drop sc8280xp power-down delay The SC8280XP PHY does not need a delay before starting the PHY (which is what the has_pwrdn_delay config option really controls) so drop the unnecessary delay. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012081241.18273-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 3aab9ea900783..57dda1ecefe69 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1718,10 +1718,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { -- GitLab From 38cd167d1fc6b5bf038229b1fa02bb1f551a564f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:12:41 +0200 Subject: [PATCH 066/694] phy: qcom-qmp-usb: drop power-down delay config The power-down delay was included in the first version of the QMP driver as an optional delay after powering on the PHY (using POWER_DOWN_CONTROL) and just before starting it. Later changes modified this sequence by powering on before initialising the PHY, but the optional delay stayed where it was (i.e. before starting the PHY). The vendor driver does not use a delay before starting the PHY and this is likely not needed on any platform unless there is a corresponding delay in the vendor kernel init sequence tables (i.e. in devicetree). Let's keep the delay for now, but drop the redundant delay period configuration while increasing the unnecessarily low timer slack somewhat. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012081241.18273-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 35 +------------------------ 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 57dda1ecefe69..751f628710eb5 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -63,8 +63,6 @@ #define CLAMP_EN BIT(0) /* enables i/o clamp_n */ #define PHY_INIT_COMPLETE_TIMEOUT 10000 -#define POWER_DOWN_DELAY_US_MIN 10 -#define POWER_DOWN_DELAY_US_MAX 11 struct qmp_phy_init_tbl { unsigned int offset; @@ -1452,9 +1450,6 @@ struct qmp_phy_cfg { /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; - /* power_down delay in usec */ - int pwrdn_delay_min; - int pwrdn_delay_max; /* true, if PHY has a separate DP_COM control block */ bool has_phy_dp_com_ctrl; @@ -1660,9 +1655,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1690,9 +1682,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1744,8 +1733,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { @@ -1798,11 +1785,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS, - .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1833,8 +1816,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { @@ -1864,9 +1845,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -1897,8 +1875,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { @@ -1928,8 +1904,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { @@ -1959,8 +1933,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { @@ -1990,9 +1962,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, - .has_phy_dp_com_ctrl = true, }; @@ -2023,8 +1992,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .phy_status = PHYSTATUS, .has_pwrdn_delay = true, - .pwrdn_delay_min = POWER_DOWN_DELAY_US_MIN, - .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { @@ -2205,7 +2172,7 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) - usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max); + usleep_range(10, 20); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); -- GitLab From 0983529d7513e5417a5010f70582e1040c404551 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Fri, 14 Oct 2022 00:08:41 +0530 Subject: [PATCH 067/694] phy: tegra: p2u: Set ENABLE_L2_EXIT_RATE_CHANGE in calibration Set ENABLE_L2_EXIT_RATE_CHANGE register bit to request UPHY PLL rate change to Gen1 during initialization. This helps in the below surprise link down cases, - Surprise link down happens at Gen3/Gen4 link speed. - Surprise link down happens and external REFCLK is cut off, which causes UPHY PLL rate to deviate to an invalid rate. Signed-off-by: Vidya Sagar Link: https://lore.kernel.org/r/20221013183854.21087-9-vidyas@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/phy-tegra194-p2u.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c b/drivers/phy/tegra/phy-tegra194-p2u.c index 1415ca71de382..633e6b7472759 100644 --- a/drivers/phy/tegra/phy-tegra194-p2u.c +++ b/drivers/phy/tegra/phy-tegra194-p2u.c @@ -15,6 +15,7 @@ #include #define P2U_CONTROL_CMN 0x74 +#define P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE BIT(13) #define P2U_CONTROL_CMN_SKP_SIZE_PROTECTION_EN BIT(20) #define P2U_PERIODIC_EQ_CTRL_GEN3 0xc0 @@ -85,8 +86,21 @@ static int tegra_p2u_power_on(struct phy *x) return 0; } +static int tegra_p2u_calibrate(struct phy *x) +{ + struct tegra_p2u *phy = phy_get_drvdata(x); + u32 val; + + val = p2u_readl(phy, P2U_CONTROL_CMN); + val |= P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE; + p2u_writel(phy, val, P2U_CONTROL_CMN); + + return 0; +} + static const struct phy_ops ops = { .power_on = tegra_p2u_power_on, + .calibrate = tegra_p2u_calibrate, .owner = THIS_MODULE, }; -- GitLab From 4a9eac5ae2200f1b208dd33738777f89f93dc0fe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:43 +0200 Subject: [PATCH 068/694] phy: qcom-qmp-pcie: fix sc8180x initialisation The phy_status mask was never set for SC8180X which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: f839f14e24f2 ("phy: qcom-qmp: Add sc8180x PCIe support") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 151baa63e8e87..a7677b61f96e9 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1733,6 +1733,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { -- GitLab From 94b7288eadf6e2c09e6280c65a9d07cca01bf434 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:44 +0200 Subject: [PATCH 069/694] phy: qcom-qmp-pcie: fix ipq8074-gen3 initialisation The phy_status mask was never set for IPQ8074 (gen3) which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: 334fad185415 ("phy: qcom-qmp-pcie: add IPQ8074 PCIe Gen3 QMP PHY support") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index a7677b61f96e9..f1e94b879d318 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1519,6 +1519,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, .pipe_clock_rate = 250000000, }; -- GitLab From 30518b19895789aa9101474af2ee0f62cd882d5e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:45 +0200 Subject: [PATCH 070/694] phy: qcom-qmp-pcie: fix ipq6018 initialisation The phy_status mask was never set for IPQ6018 which meant that the driver would not wait for the PHY to be initialised during power-on and would never detect PHY initialisation timeouts. Fixes: 520264db3bf9 ("phy: qcom-qmp: add QMP V2 PCIe PHY support for ipq60xx") Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f1e94b879d318..9b866ed19ddc8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1549,6 +1549,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { -- GitLab From 2577ba8c39dafe4320e1eb206b732e08bf871c83 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:46 +0200 Subject: [PATCH 071/694] phy: qcom-qmp-pcie: clean up status polling Clean up the status polling by dropping the ready bit mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 9b866ed19ddc8..4af6b9e50d16f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1966,7 +1966,7 @@ static int qmp_pcie_power_on(struct phy *phy) const struct qmp_phy_cfg_tables *mode_tables; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int mask, val; int ret; qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], @@ -2004,9 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy) status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & mask), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 5cbeb75a36aa28353e51f5e6926e19449a4f3389 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:47 +0200 Subject: [PATCH 072/694] phy: qcom-qmp-pcie: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 4af6b9e50d16f..d3e7e673114fb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2004,7 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy) status = pcs + cfg->regs[QPHY_PCS_STATUS]; mask = cfg->phy_status; - ret = readl_poll_timeout(status, val, !(val & mask), 10, + ret = readl_poll_timeout(status, val, !(val & mask), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 1a3ae97c2490b2217810a17ab4f47552e9f6f70f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:48 +0200 Subject: [PATCH 073/694] phy: qcom-qmp-pcie-msm8996: clean up ready and status polling Clean up the PHY ready and status polling by dropping the configuration masks which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index 899be7bd4d92d..b9260c8746bf0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -35,7 +35,6 @@ #define PLL_READY_GATE_EN BIT(3) /* QPHY_PCS_STATUS bit */ #define PHYSTATUS BIT(6) -#define PHYSTATUS_4_20 BIT(7) /* QPHY_COM_PCS_READY_STATUS bit */ #define PCS_READY BIT(0) @@ -200,9 +199,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - unsigned int mask_com_pcs_ready; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; }; /** @@ -318,8 +314,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .start_ctrl = PCS_START | PLL_READY_GATE_EN, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, - .mask_com_pcs_ready = PCS_READY, - .phy_status = PHYSTATUS, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -356,7 +350,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; void __iomem *status; - unsigned int mask, val; + unsigned int val; int ret; qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num); @@ -366,9 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) SERDES_START | PCS_START); status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS]; - mask = cfg->mask_com_pcs_ready; - - ret = readl_poll_timeout(status, val, (val & mask), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, @@ -484,7 +476,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_pcie_msm8996_serdes_init(qphy); @@ -522,10 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 3894f6d03c4e96665232fbe2e04589f1228cbb0c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:49 +0200 Subject: [PATCH 074/694] phy: qcom-qmp-pcie-msm8996: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index b9260c8746bf0..cd8fafe4c2956 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -360,7 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy) SERDES_START | PCS_START); status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS]; - ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, @@ -514,7 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From f7075f4905e79e340b0e5f0f097c8ce896be8bb3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:50 +0200 Subject: [PATCH 075/694] phy: qcom-qmp-combo: clean up status polling Clean up the PHY status polling by dropping the configuration mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 82055d3a3536b..e312cad6d9c21 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -853,8 +853,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1023,7 +1021,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1092,7 +1089,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1127,7 +1123,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1197,7 +1192,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { @@ -1267,7 +1261,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -2017,7 +2010,7 @@ static int qmp_combo_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_combo_serdes_init(qphy); @@ -2059,10 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 0c1154d69511a030b8ebb2b873095a7fa851e189 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:51 +0200 Subject: [PATCH 076/694] phy: qcom-qmp-combo: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index e312cad6d9c21..0071c73ac1c83 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2052,7 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From c8f5c188156b87c115f27d44004428ede2e262f8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:52 +0200 Subject: [PATCH 077/694] phy: qcom-qmp-ufs: drop unused phy-status config Drop the unused phy-status configuration mask which has never been used for UFS PHYs. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 02931b82132f7..1c7d8fc9b94aa 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -31,8 +31,6 @@ /* QPHY_START_CONTROL bits */ #define SERDES_START BIT(0) #define PCS_START BIT(1) -/* QPHY_PCS_STATUS bit */ -#define PHYSTATUS BIT(6) /* QPHY_PCS_READY_STATUS bit */ #define PCS_READY BIT(0) @@ -548,8 +546,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PCS block has no separate SW_RESET register */ bool no_pcs_sw_reset; @@ -668,7 +664,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .no_pcs_sw_reset = true, }; @@ -692,7 +687,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .no_pcs_sw_reset = true, }; @@ -739,7 +733,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { @@ -761,7 +754,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { @@ -783,7 +775,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { .start_ctrl = SERDES_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static void qmp_ufs_configure_lane(void __iomem *base, -- GitLab From 2f561b687cf47e289b7e068881ad87530c1f1435 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:53 +0200 Subject: [PATCH 078/694] phy: qcom-qmp-ufs: clean up ready polling Clean up the PHY ready polling by dropping the mask variables which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 1c7d8fc9b94aa..8380904cf26c2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -908,7 +908,7 @@ static int qmp_ufs_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_ufs_serdes_init(qphy); @@ -937,10 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; - mask = PCS_READY; - ready = PCS_READY; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 7516edbfaf708e5b987f1b9f23aa7336dd4a812d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:54 +0200 Subject: [PATCH 079/694] phy: qcom-qmp-ufs: increase ready polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 8380904cf26c2..1a51f803928b7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -937,7 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; - ret = readl_poll_timeout(status, val, (val & PCS_READY), 10, + ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From f5ef85adece529a6cd1e7563081c41038923a9ed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:55 +0200 Subject: [PATCH 080/694] phy: qcom-qmp-usb: clean up status polling Clean up the PHY status polling by dropping the configuration mask which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 751f628710eb5..840b67167581e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1445,8 +1445,6 @@ struct qmp_phy_cfg { unsigned int start_ctrl; unsigned int pwrdn_ctrl; - /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ - unsigned int phy_status; /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1604,7 +1602,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { @@ -1628,7 +1625,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { @@ -1652,7 +1648,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1679,7 +1674,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1706,7 +1700,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -1730,7 +1723,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1756,7 +1748,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { @@ -1783,7 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1813,7 +1803,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1842,7 +1831,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1872,7 +1860,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1901,7 +1888,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1930,7 +1916,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -1959,7 +1944,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, @@ -1989,7 +1973,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, .has_pwrdn_delay = true, }; @@ -2015,7 +1998,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN, - .phy_status = PHYSTATUS, }; static void qmp_usb_configure_lane(void __iomem *base, @@ -2147,7 +2129,7 @@ static int qmp_usb_power_on(struct phy *phy) void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; void __iomem *status; - unsigned int mask, val, ready; + unsigned int val; int ret; qmp_usb_serdes_init(qphy); @@ -2181,10 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - mask = cfg->phy_status; - ready = 0; - - ret = readl_poll_timeout(status, val, (val & mask) == ready, 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From 7612890b9df8f3f4f9b4fd39d988a5afd97aa3e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:56 +0200 Subject: [PATCH 081/694] phy: qcom-qmp-usb: increase status polling period It typically takes between one and two milliseconds for the PHY to become ready after starting it. Increase the tight 3--10 us polling period to the more reasonable 51--200 us. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221012085002.24099-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 840b67167581e..0bd9291e6a7b5 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2163,7 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10, + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, PHY_INIT_COMPLETE_TIMEOUT); if (ret) { dev_err(qmp->dev, "phy initialization timed-out\n"); -- GitLab From d4b81490fe44429203ae6e55df8a556e5b77c88e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:57 +0200 Subject: [PATCH 082/694] phy: qcom-qmp-combo: drop start and pwrdn-ctrl abstraction All USB PHYs need to start and stop the SerDes and PCS so drop the start-ctrl abstraction which is no longer needed since the QMP driver split. Similarly, drop the pwrdn-ctrl abstraction which also is not needed since the split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 29 +++++------------------ 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 0071c73ac1c83..2fab8d5ec0f1d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -851,9 +851,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1019,9 +1016,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1087,9 +1081,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1121,9 +1112,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1189,9 +1177,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { @@ -1259,9 +1244,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1944,8 +1926,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -2049,7 +2030,8 @@ static int qmp_combo_power_on(struct phy *phy) /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -2082,11 +2064,12 @@ static int qmp_combo_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); } return 0; -- GitLab From 5806b87dea8fc1b65a542ef93cbe5f6114157a74 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:58 +0200 Subject: [PATCH 083/694] phy: qcom-qmp-pcie: drop start-ctrl abstraction All PCIe PHYs need to start and stop the SerDes and PCS so drop the start-ctrl abstraction which is no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index d3e7e673114fb..5534a4ad02435 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1355,7 +1355,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; unsigned int pwrdn_ctrl; /* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */ unsigned int phy_status; @@ -1491,7 +1490,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .num_vregs = 0, .regs = pciephy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1517,7 +1515,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .num_vregs = 0, .regs = ipq_pciephy_gen3_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, @@ -1547,7 +1544,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .num_vregs = 0, .regs = ipq_pciephy_gen3_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1575,7 +1571,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_qmp_pciephy_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1601,7 +1596,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_qhp_pciephy_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1639,7 +1633,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1677,7 +1670,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1703,7 +1695,6 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = pciephy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, @@ -1733,7 +1724,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1761,7 +1751,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = PCS_START | SERDES_START, .pwrdn_ctrl = SW_PWRDN, .phy_status = PHYSTATUS_4_20, }; @@ -1789,7 +1778,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS, }; @@ -1832,7 +1820,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8250_pcie_regs_layout, - .start_ctrl = SERDES_START | PCS_START, .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, .phy_status = PHYSTATUS_4_20, }; @@ -1997,7 +1984,7 @@ static int qmp_pcie_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); if (!cfg->skip_start_delay) usleep_range(1000, 1200); @@ -2030,7 +2017,8 @@ static int qmp_pcie_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], -- GitLab From 73ad6a9dd51799afd104edc2bf2016a347a717fe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:49:59 +0200 Subject: [PATCH 084/694] phy: qcom-qmp-pcie: add config sanity checks The driver expects every configuration to set the pwrdn_ctrl and phy_status masks. Add some probe WARN_ON_ONCE() to probe to catch any new driver support that fails to provide them. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 5534a4ad02435..7c81667dd9680 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2347,6 +2347,9 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!cfg) return -EINVAL; + WARN_ON_ONCE(!cfg->pwrdn_ctrl); + WARN_ON_ONCE(!cfg->phy_status); + serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); -- GitLab From 3d3db6f024e70255899a32323e20561c8c6f5850 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:00 +0200 Subject: [PATCH 085/694] phy: qcom-qmp-pcie-msm8996: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-18-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index cd8fafe4c2956..ff198d846fd2d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -196,9 +196,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; }; /** @@ -311,9 +308,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = pciephy_regs_layout, - - .start_ctrl = PCS_START | PLL_READY_GATE_EN, - .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, }; static void qmp_pcie_msm8996_configure_lane(void __iomem *base, @@ -503,7 +497,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) * Pull out PHY from POWER DOWN state. * This is active low enable signal to power-down PHY. */ - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl); + qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, + SW_PWRDN | REFCLK_DRV_DSBL); usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX); @@ -511,7 +506,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], + PCS_START | PLL_READY_GATE_EN); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -542,11 +538,12 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + SW_PWRDN | REFCLK_DRV_DSBL); return 0; } -- GitLab From cb4a982fa94a106c3e5d7d9f596375ae442a71ba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:01 +0200 Subject: [PATCH 086/694] phy: qcom-qmp-ufs: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-19-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 35 +++++-------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 1a51f803928b7..9b58d742af3b7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -544,9 +544,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PCS block has no separate SW_RESET register */ bool no_pcs_sw_reset; }; @@ -662,9 +659,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .regs = msm8996_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -685,9 +679,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sdm845_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -708,9 +699,6 @@ static const struct qmp_phy_cfg sm6115_ufsphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm6115_ufsphy_regs_layout, - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, - .no_pcs_sw_reset = true, }; @@ -730,9 +718,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { @@ -751,9 +736,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { @@ -772,9 +754,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = sm8150_ufsphy_regs_layout, - - .start_ctrl = SERDES_START, - .pwrdn_ctrl = SW_PWRDN, }; static void qmp_ufs_configure_lane(void __iomem *base, @@ -832,8 +811,7 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) if (ret) goto err_disable_regulators; - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -933,8 +911,9 @@ static int qmp_ufs_power_on(struct phy *phy) /* Pull PHY out of reset state */ if (!cfg->no_pcs_sw_reset) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + + /* start SerDes */ + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); status = pcs + cfg->regs[QPHY_PCS_READY_STATUS]; ret = readl_poll_timeout(status, val, (val & PCS_READY), 200, @@ -956,12 +935,12 @@ static int qmp_ufs_power_off(struct phy *phy) if (!cfg->no_pcs_sw_reset) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + /* stop SerDes */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); return 0; } -- GitLab From 47b009db545ae90f0b50149029a6b8137685f524 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Oct 2022 10:50:02 +0200 Subject: [PATCH 087/694] phy: qcom-qmp-usb: drop start and pwrdn-ctrl abstraction Drop the start and pwrdn-ctrl abstractions which are no longer needed since the QMP driver split. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221012085002.24099-20-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 61 ++----------------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0bd9291e6a7b5..d0c433197080f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1443,9 +1443,6 @@ struct qmp_phy_cfg { /* array of registers with different offsets */ const unsigned int *regs; - unsigned int start_ctrl; - unsigned int pwrdn_ctrl; - /* true, if PHY needs delay after POWER_DOWN */ bool has_pwrdn_delay; @@ -1599,9 +1596,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { @@ -1622,9 +1616,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { @@ -1646,9 +1637,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1672,9 +1660,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1697,9 +1682,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -1721,9 +1703,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1745,9 +1724,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { @@ -1772,9 +1748,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1801,9 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1829,9 +1799,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1858,9 +1825,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1886,9 +1850,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x600, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1914,9 +1875,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x1000, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1942,9 +1900,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, .has_phy_dp_com_ctrl = true, }; @@ -1971,9 +1926,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x1000, - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, - .has_pwrdn_delay = true, }; @@ -1995,9 +1947,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qcm2290_usb3phy_regs_layout, - - .start_ctrl = SERDES_START | PCS_START, - .pwrdn_ctrl = SW_PWRDN, }; static void qmp_usb_configure_lane(void __iomem *base, @@ -2092,8 +2041,7 @@ static int qmp_usb_init(struct phy *phy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); } - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2160,7 +2108,7 @@ static int qmp_usb_power_on(struct phy *phy) qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); status = pcs + cfg->regs[QPHY_PCS_STATUS]; ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, @@ -2189,11 +2137,12 @@ static int qmp_usb_power_off(struct phy *phy) qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl); + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + SW_PWRDN); return 0; } -- GitLab From 922adfd59efd337059f8445a8d8968552b06ed4e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 29 Sep 2022 22:00:17 +0300 Subject: [PATCH 088/694] phy: qcom-qmp-usb: correct registers layout for IPQ8074 USB3 PHY According to the kernel 4.4 sources from NHSS.QSDK.9.0.2 and according to hardware docs, the PHY registers layout used for IPQ8074 USB3 PHY is incorrect. This platform uses offset 0x174 for the PCS_STATUS register, 0xd8 for PCS_AUTONOMOUS_MODE_CTRL, etc. Correct the PHY registers layout. Fixes: 94a407cc17a4 ("phy: qcom-qmp: create copies of QMP PHY driver") Fixes: 507156f5a99f ("phy: qcom-qmp: Add USB QMP PHY support for IPQ8074") Signed-off-by: Dmitry Baryshkov Reviewed-by: Kathiravan T Link: https://lore.kernel.org/r/20220929190017.529207-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index d0c433197080f..cd167508f5281 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1595,7 +1595,7 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = usb3phy_regs_layout, + .regs = qmp_v3_usb3phy_regs_layout, }; static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { -- GitLab From d907774ed5aab5a33ef4106ea3830e673196313b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:32 +0200 Subject: [PATCH 089/694] phy: qcom-qmp-ufs: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 78 ++++++++++++------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 9b58d742af3b7..acb8efa1d7584 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1081,45 +1081,6 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_ufs_of_match_table[] = { - { - .compatible = "qcom,msm8996-qmp-ufs-phy", - .data = &msm8996_ufs_cfg, - }, { - .compatible = "qcom,msm8998-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sc8280xp-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sm6115-qmp-ufs-phy", - .data = &sm6115_ufsphy_cfg, - }, { - .compatible = "qcom,sm6350-qmp-ufs-phy", - .data = &sdm845_ufsphy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-ufs-phy", - .data = &sm8150_ufsphy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-ufs-phy", - .data = &sm8450_ufsphy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table); - static int qmp_ufs_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; @@ -1185,6 +1146,45 @@ static int qmp_ufs_probe(struct platform_device *pdev) return ret; } +static const struct of_device_id qmp_ufs_of_match_table[] = { + { + .compatible = "qcom,msm8996-qmp-ufs-phy", + .data = &msm8996_ufs_cfg, + }, { + .compatible = "qcom,msm8998-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-ufs-phy", + .data = &sm8350_ufsphy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sm6115-qmp-ufs-phy", + .data = &sm6115_ufsphy_cfg, + }, { + .compatible = "qcom,sm6350-qmp-ufs-phy", + .data = &sdm845_ufsphy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-ufs-phy", + .data = &sm8150_ufsphy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-ufs-phy", + .data = &sm8350_ufsphy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-ufs-phy", + .data = &sm8450_ufsphy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table); + static struct platform_driver qmp_ufs_driver = { .probe = qmp_ufs_probe, .driver = { -- GitLab From a36032db30deb2235bc18a8f1088c9a801bf66b0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:33 +0200 Subject: [PATCH 090/694] phy: qcom-qmp-ufs: merge driver data The UFS QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 171 +++++++++--------------- 1 file changed, 63 insertions(+), 108 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index acb8efa1d7584..b4c3b3d97f523 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -548,54 +548,24 @@ struct qmp_phy_cfg { bool no_pcs_sw_reset; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @qmp: QMP phy to which this lane belongs - */ -struct qmp_phy { - struct phy *phy; +struct qmp_ufs { + struct device *dev; + const struct qmp_phy_cfg *cfg; + void __iomem *serdes; + void __iomem *pcs; + void __iomem *pcs_misc; void __iomem *tx; void __iomem *rx; - void __iomem *pcs; void __iomem *tx2; void __iomem *rx2; - void __iomem *pcs_misc; - struct qcom_qmp *qmp; -}; - -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - * @ufs_reset: optional UFS PHY reset handle - */ -struct qcom_qmp { - struct device *dev; struct clk_bulk_data *clks; struct regulator_bulk_data *vregs; - - struct qmp_phy **phys; - struct reset_control *ufs_reset; + + struct phy *phy; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -782,10 +752,10 @@ static void qmp_ufs_configure(void __iomem *base, qmp_ufs_configure_lane(base, tbl, num, 0xff); } -static int qmp_ufs_serdes_init(struct qmp_phy *qphy) +static int qmp_ufs_serdes_init(struct qmp_ufs *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -794,11 +764,10 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy) return 0; } -static int qmp_ufs_com_init(struct qmp_phy *qphy) +static int qmp_ufs_com_init(struct qmp_ufs *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs = qmp->pcs; int ret; ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); @@ -821,10 +790,9 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy) return ret; } -static int qmp_ufs_com_exit(struct qmp_phy *qphy) +static int qmp_ufs_com_exit(struct qmp_ufs *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_assert(qmp->ufs_reset); @@ -837,9 +805,8 @@ static int qmp_ufs_com_exit(struct qmp_phy *qphy) static int qmp_ufs_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; dev_vdbg(qmp->dev, "Initializing QMP phy\n"); @@ -870,7 +837,7 @@ static int qmp_ufs_init(struct phy *phy) return ret; } - ret = qmp_ufs_com_init(qphy); + ret = qmp_ufs_com_init(qmp); if (ret) return ret; @@ -879,28 +846,27 @@ static int qmp_ufs_init(struct phy *phy) static int qmp_ufs_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *pcs = qphy->pcs; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; - qmp_ufs_serdes_init(qphy); + qmp_ufs_serdes_init(qmp); /* Tx, Rx, and PCS configurations */ qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -928,18 +894,18 @@ static int qmp_ufs_power_on(struct phy *phy) static int qmp_ufs_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_ufs *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; /* PHY reset */ if (!cfg->no_pcs_sw_reset) - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -947,9 +913,9 @@ static int qmp_ufs_power_off(struct phy *phy) static int qmp_ufs_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_ufs *qmp = phy_get_drvdata(phy); - qmp_ufs_com_exit(qphy); + qmp_ufs_com_exit(qmp); return 0; } @@ -981,7 +947,7 @@ static int qmp_ufs_disable(struct phy *phy) static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -997,7 +963,7 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -1017,78 +983,71 @@ static const struct phy_ops qcom_qmp_ufs_ops = { .owner = THIS_MODULE, }; -static int qmp_ufs_create(struct device *dev, struct device_node *np, int id, +static int qmp_ufs_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_ufs *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc)) + if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_ufs_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_ufs *qmp; int num, id; int ret; @@ -1120,14 +1079,10 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_ufs_create(dev, child, id, serdes, cfg); + ret = qmp_ufs_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); -- GitLab From cb2c3d2ee46fe56144b74a22504e023aa59835aa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:34 +0200 Subject: [PATCH 091/694] phy: qcom-qmp-ufs: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index b4c3b3d97f523..25744b3576f3e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1048,7 +1048,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_ufs *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -1074,23 +1073,15 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_ufs_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = qmp_ufs_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; - id++; - } + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From 018dfc99aef2f487f95e07fcbd600e02d290ba18 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:35 +0200 Subject: [PATCH 092/694] phy: qcom-qmp-ufs: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 37 ++++++++++++------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 25744b3576f3e..057f9a3fdd6d1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -945,9 +945,10 @@ static int qmp_ufs_disable(struct phy *phy) return qmp_ufs_exit(phy); } -static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_vreg_init(struct qmp_ufs *qmp) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -961,9 +962,10 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_clk_init(struct qmp_ufs *qmp) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -983,15 +985,13 @@ static const struct phy_ops qcom_qmp_ufs_ops = { .owner = THIS_MODULE, }; -static int qmp_ufs_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) { - struct qmp_ufs *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -1045,8 +1045,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_ufs *qmp; int ret; @@ -1055,21 +1053,20 @@ static int qmp_ufs_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - ret = qmp_ufs_clk_init(dev, cfg); + ret = qmp_ufs_clk_init(qmp); if (ret) return ret; - ret = qmp_ufs_vreg_init(dev, cfg); + ret = qmp_ufs_vreg_init(qmp); if (ret) return ret; @@ -1077,7 +1074,7 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_ufs_create(dev, child, serdes, cfg); + ret = qmp_ufs_create(qmp, child); if (ret) goto err_node_put; -- GitLab From b98e44e608bcb4a2c235b50cc48144e7043595b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:36 +0200 Subject: [PATCH 093/694] phy: qcom-qmp-ufs: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 057f9a3fdd6d1..90583b87efd1c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -945,6 +945,12 @@ static int qmp_ufs_disable(struct phy *phy) return qmp_ufs_exit(phy); } +static const struct phy_ops qcom_qmp_ufs_phy_ops = { + .power_on = qmp_ufs_enable, + .power_off = qmp_ufs_disable, + .owner = THIS_MODULE, +}; + static int qmp_ufs_vreg_init(struct qmp_ufs *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -979,12 +985,6 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp) return devm_clk_bulk_get(dev, num, qmp->clks); } -static const struct phy_ops qcom_qmp_ufs_ops = { - .power_on = qmp_ufs_enable, - .power_off = qmp_ufs_disable, - .owner = THIS_MODULE, -}; - static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -1027,7 +1027,7 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops); + generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); -- GitLab From e0a0c761d2203955585b2c93126d6a712726c368 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:37 +0200 Subject: [PATCH 094/694] phy: qcom-qmp-ufs: clean up PHY init Clean up the PHY initialisation somewhat programming both tx and rx for the second lane after the first lane. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 90583b87efd1c..fa703e856eb78 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -859,14 +859,12 @@ static int qmp_ufs_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) + if (cfg->lanes >= 2) { + qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + } qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); -- GitLab From 54293c08f2c01efff4a8c1c61290e5f8e34df2df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:38 +0200 Subject: [PATCH 095/694] dt-bindings: phy: qcom,qmp-ufs: rename current bindings The current QMP UFS PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP UFS PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers. In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current bindings after MSM8996 and add a reference to the SC8280XP bindings. Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024090041.19574-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} (95%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml similarity index 95% rename from Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml index 815c375d0f7bc..438f9606414ac 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-ufs-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-ufs-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (UFS) +title: Qualcomm QMP PHY controller (UFS, MSM8996) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-ufs-phy.yaml. + properties: compatible: enum: -- GitLab From 7741f31ae44568f9c32046aaf4c6c41a51359f6d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:39 +0200 Subject: [PATCH 096/694] dt-bindings: phy: qcom,qmp-ufs: fix sc8280xp binding The current QMP UFS PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP UFS PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers. Add a new binding for the UFS QMP PHYs found on SC8280XP which further bindings can be based on. Note that the current binding is simply removed instead of being deprecated as it was only recently merged and support for SC8280XP is still under development. Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024090041.19574-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,msm8996-qmp-ufs-phy.yaml | 10 +-- .../phy/qcom,sc8280xp-qmp-ufs-phy.yaml | 83 +++++++++++++++++++ 2 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml index 438f9606414ac..be41acbd3b6c3 100644 --- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml @@ -22,7 +22,6 @@ properties: - qcom,msm8996-qmp-ufs-phy - qcom,msm8998-qmp-ufs-phy - qcom,sc8180x-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6115-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy @@ -122,7 +121,6 @@ allOf: enum: - qcom,msm8998-qmp-ufs-phy - qcom,sc8180x-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6115-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy @@ -159,7 +157,6 @@ allOf: contains: enum: - qcom,msm8998-qmp-ufs-phy - - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy - qcom,sm6350-qmp-ufs-phy - qcom,sm8150-qmp-ufs-phy @@ -214,11 +211,12 @@ allOf: examples: - | - #include + #include #include + phy-wrapper@1d87000 { - compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0x01d87000 0xe10>; + compatible = "qcom,sm8250-qmp-ufs-phy"; + reg = <0x01d87000 0x1c0>; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x01d87000 0x1000>; diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml new file mode 100644 index 0000000000000..dde86a19f7926 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-ufs-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (UFS, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-ufs-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 2 + + clock-names: + items: + - const: ref + - const: ref_aux + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: ufsphy + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + + ufs_mem_phy: phy@1d87000 { + compatible = "qcom,sc8280xp-qmp-ufs-phy"; + reg = <0x01d87000 0x1000>; + + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; + clock-names = "ref", "ref_aux"; + + power-domains = <&gcc UFS_PHY_GDSC>; + + resets = <&ufs_mem_hc 0>; + reset-names = "ufsphy"; + + vdda-phy-supply = <&vreg_l6b>; + vdda-pll-supply = <&vreg_l3b>; + + #phy-cells = <0>; + }; -- GitLab From c64d39b403d8dc751ea6a56f97962f93f811fed4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:40 +0200 Subject: [PATCH 097/694] phy: qcom-qmp-ufs: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes resource in what is now the legacy devicetree helper. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 34 ++++++++++++------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index fa703e856eb78..bf5c1a6b9ca4f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -983,12 +983,15 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp) return devm_clk_bulk_get(dev, num, qmp->clks); } -static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) +static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); /* * Get memory resources for the PHY: @@ -1025,16 +1028,6 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np) if (IS_ERR(qmp->pcs_misc)) dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -1056,10 +1049,6 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - ret = qmp_ufs_clk_init(qmp); if (ret) return ret; @@ -1072,10 +1061,19 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_ufs_create(qmp, child); + ret = qmp_ufs_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; + qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From 0e089bb8b31f7651d364723122af7ba7be7b98a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Oct 2022 11:00:41 +0200 Subject: [PATCH 098/694] phy: qcom-qmp-ufs: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Also note that the driver will continue to accept the old binding, at least for the time being. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221024090041.19574-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 90 ++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index bf5c1a6b9ca4f..189103d1bd180 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -520,10 +520,21 @@ static const struct qmp_phy_init_tbl sm8350_ufsphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1, 0x02), }; +struct qmp_ufs_offsets { + u16 serdes; + u16 pcs; + u16 tx; + u16 rx; + u16 tx2; + u16 rx2; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; + const struct qmp_ufs_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -611,6 +622,15 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_ufs_offsets qmp_ufs_offsets_v5 = { + .serdes = 0, + .pcs = 0xc00, + .tx = 0x400, + .rx = 0x600, + .tx2 = 0x800, + .rx2 = 0xa00, +}; + static const struct qmp_phy_cfg msm8996_ufs_cfg = { .lanes = 1, @@ -632,6 +652,26 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = { .no_pcs_sw_reset = true, }; +static const struct qmp_phy_cfg sc8280xp_ufsphy_cfg = { + .lanes = 2, + + .offsets = &qmp_ufs_offsets_v5, + + .serdes_tbl = sm8350_ufsphy_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sm8350_ufsphy_serdes_tbl), + .tx_tbl = sm8350_ufsphy_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_tx_tbl), + .rx_tbl = sm8350_ufsphy_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sm8350_ufsphy_rx_tbl), + .pcs_tbl = sm8350_ufsphy_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sm8350_ufsphy_pcs_tbl), + .clk_list = sdm845_ufs_phy_clk_l, + .num_clks = ARRAY_SIZE(sdm845_ufs_phy_clk_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8150_ufsphy_regs_layout, +}; + static const struct qmp_phy_cfg sdm845_ufsphy_cfg = { .lanes = 2, @@ -1031,11 +1071,38 @@ static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np) return 0; } +static int qmp_ufs_parse_dt(struct qmp_ufs *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_ufs_offsets *offs = cfg->offsets; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + if (cfg->lanes >= 2) { + qmp->tx2 = base + offs->tx2; + qmp->rx2 = base + offs->rx2; + } + + return 0; +} + static int qmp_ufs_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_ufs *qmp; int ret; @@ -1057,15 +1124,18 @@ static int qmp_ufs_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; - - ret = qmp_ufs_parse_dt_legacy(qmp, child); + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_ufs_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_ufs_parse_dt(qmp); + } if (ret) goto err_node_put; - qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -1074,14 +1144,14 @@ static int qmp_ufs_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } @@ -1097,7 +1167,7 @@ static const struct of_device_id qmp_ufs_of_match_table[] = { .data = &sm8150_ufsphy_cfg, }, { .compatible = "qcom,sc8280xp-qmp-ufs-phy", - .data = &sm8350_ufsphy_cfg, + .data = &sc8280xp_ufsphy_cfg, }, { .compatible = "qcom,sdm845-qmp-ufs-phy", .data = &sdm845_ufsphy_cfg, -- GitLab From 8330b9ebf9ef10156b01d40176b9fff1ce2a374c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Oct 2022 07:24:03 +0200 Subject: [PATCH 099/694] iommu/fsl_pamu: Replace NO_IRQ by 0 NO_IRQ is used to check the return of irq_of_parse_and_map(). On some architecture NO_IRQ is 0, on other architectures it is -1. irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ. So use 0 instead of using NO_IRQ. Signed-off-by: Christophe Leroy Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/2a2570a8d12c80a7d36837b6c586daa708ca09d7.1665033732.git.christophe.leroy@csgroup.eu Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 0d03f837a5d4e..1b53d2da2c191 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -779,7 +779,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) of_get_address(dev->of_node, 0, &size, NULL); irq = irq_of_parse_and_map(dev->of_node, 0); - if (irq == NO_IRQ) { + if (!irq) { dev_warn(dev, "no interrupts listed in PAMU node\n"); goto error; } @@ -903,7 +903,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; error: - if (irq != NO_IRQ) + if (irq) free_irq(irq, data); kfree_sensitive(data); -- GitLab From 2a48b15972a3b2a2622e6e537e1d53f457670395 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2022 14:43:01 +0100 Subject: [PATCH 100/694] iommu/amd: Remove variable cnt ind iommu_poll_ga_log() Variable cnt is just being incremented and it's never used anywhere else. The variable and the increment are redundant so remove it. Signed-off-by: Colin Ian King Reviewed-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20221024134301.2158939-1-colin.i.king@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3b39d0416fa3..3847f3bdc5687 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -767,7 +767,7 @@ EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); static void iommu_poll_ga_log(struct amd_iommu *iommu) { - u32 head, tail, cnt = 0; + u32 head, tail; if (iommu->ga_log == NULL) return; @@ -780,7 +780,6 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu) u64 log_entry; raw = (u64 *)(iommu->ga_log + head); - cnt++; /* Avoid memcpy function-call overhead */ log_entry = *raw; -- GitLab From bf8d2dd2ed0825a58f31cc510245a1eb46f8a87e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:52 +0200 Subject: [PATCH 101/694] iommu/s390: Fix duplicate domain attachments Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls") we can end up with duplicates in the list of devices attached to a domain. This is inefficient and confusing since only one domain can actually be in control of the IOMMU translations for a device. Fix this by detaching the device from the previous domain, if any, on attach. Add a WARN_ON() in case we still have attached devices on freeing the domain. While here remove the re-attach on failure dance as it was determined to be unlikely to help and may confuse debug and recovery. Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls") Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-2-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 106 ++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 61 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3c071782f6f16..c2e5e81d609e1 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); + WARN_ON(!list_empty(&s390_domain->devices)); dma_cleanup_tables(s390_domain->dma_table); kfree(s390_domain); } +static void __s390_iommu_detach_device(struct zpci_dev *zdev) +{ + struct s390_domain *s390_domain = zdev->s390_domain; + struct s390_domain_device *domain_device, *tmp; + unsigned long flags; + + if (!s390_domain) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, + list) { + if (domain_device->zdev == zdev) { + list_del(&domain_device->list); + kfree(domain_device); + break; + } + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + zpci_unregister_ioat(zdev, 0); + zdev->s390_domain = NULL; + zdev->dma_table = NULL; +} + static int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -90,7 +116,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct zpci_dev *zdev = to_zpci_dev(dev); struct s390_domain_device *domain_device; unsigned long flags; - int cc, rc; + int cc, rc = 0; if (!zdev) return -ENODEV; @@ -99,24 +125,18 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, if (!domain_device) return -ENOMEM; - if (zdev->dma_table && !zdev->s390_domain) { - cc = zpci_dma_exit_device(zdev); - if (cc) { - rc = -EIO; - goto out_free; - } - } - if (zdev->s390_domain) - zpci_unregister_ioat(zdev, 0); + __s390_iommu_detach_device(zdev); + else if (zdev->dma_table) + zpci_dma_exit_device(zdev); - zdev->dma_table = s390_domain->dma_table; cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(s390_domain->dma_table)); if (cc) { rc = -EIO; - goto out_restore; + goto out_free; } + zdev->dma_table = s390_domain->dma_table; spin_lock_irqsave(&s390_domain->list_lock, flags); /* First device defines the DMA range limits */ @@ -127,9 +147,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, /* Allow only devices with identical DMA range limits */ } else if (domain->geometry.aperture_start != zdev->start_dma || domain->geometry.aperture_end != zdev->end_dma) { - rc = -EINVAL; spin_unlock_irqrestore(&s390_domain->list_lock, flags); - goto out_restore; + rc = -EINVAL; + goto out_unregister; } domain_device->zdev = zdev; zdev->s390_domain = s390_domain; @@ -138,14 +158,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; -out_restore: - if (!zdev->s390_domain) { - zpci_dma_init_device(zdev); - } else { - zdev->dma_table = zdev->s390_domain->dma_table; - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); - } +out_unregister: + zpci_unregister_ioat(zdev, 0); + zdev->dma_table = NULL; out_free: kfree(domain_device); @@ -155,32 +170,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, static void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device, *tmp; - unsigned long flags; - int found = 0; - if (!zdev) - return; + WARN_ON(zdev->s390_domain != to_s390_domain(domain)); - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, - list) { - if (domain_device->zdev == zdev) { - list_del(&domain_device->list); - kfree(domain_device); - found = 1; - break; - } - } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - - if (found && (zdev->s390_domain == s390_domain)) { - zdev->s390_domain = NULL; - zpci_unregister_ioat(zdev, 0); - zpci_dma_init_device(zdev); - } + __s390_iommu_detach_device(zdev); + zpci_dma_init_device(zdev); } static struct iommu_device *s390_iommu_probe_device(struct device *dev) @@ -198,24 +193,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) static void s390_iommu_release_device(struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); - struct iommu_domain *domain; /* - * This is a workaround for a scenario where the IOMMU API common code - * "forgets" to call the detach_dev callback: After binding a device - * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers - * the attach_dev), removing the device via - * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, - * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE - * notifier. - * - * So let's call detach_dev from here if it hasn't been called before. + * release_device is expected to detach any domain currently attached + * to the device, but keep it attached to other devices in the group. */ - if (zdev && zdev->s390_domain) { - domain = iommu_get_domain_for_dev(dev); - if (domain) - s390_iommu_detach_device(domain, dev); - } + if (zdev) + __s390_iommu_detach_device(zdev); } static int s390_iommu_update_trans(struct s390_domain *s390_domain, -- GitLab From 1a3a7d64bbce3179401f4e691522ff992aa1b8a1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:53 +0200 Subject: [PATCH 102/694] iommu/s390: Get rid of s390_domain_device The struct s390_domain_device serves the sole purpose as list entry for the devices list of a struct s390_domain. As it contains no additional information besides a list_head and a pointer to the struct zpci_dev we can simplify things and just thread the device list through struct zpci_dev directly. This removes the need to allocate during domain attach and gets rid of one level of indirection during mapping operations. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-3-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 + drivers/iommu/s390-iommu.c | 37 +++++++------------------------------ 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 108e732d7b140..15f8714ca9b7c 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -117,6 +117,7 @@ struct zpci_bus { struct zpci_dev { struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head iommu_list; struct kref kref; struct hotplug_slot hotplug_slot; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index c2e5e81d609e1..af83ccde16a49 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -29,11 +29,6 @@ struct s390_domain { spinlock_t list_lock; }; -struct s390_domain_device { - struct list_head list; - struct zpci_dev *zdev; -}; - static struct s390_domain *to_s390_domain(struct iommu_domain *dom) { return container_of(dom, struct s390_domain, domain); @@ -87,21 +82,13 @@ static void s390_domain_free(struct iommu_domain *domain) static void __s390_iommu_detach_device(struct zpci_dev *zdev) { struct s390_domain *s390_domain = zdev->s390_domain; - struct s390_domain_device *domain_device, *tmp; unsigned long flags; if (!s390_domain) return; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, - list) { - if (domain_device->zdev == zdev) { - list_del(&domain_device->list); - kfree(domain_device); - break; - } - } + list_del_init(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); zpci_unregister_ioat(zdev, 0); @@ -114,17 +101,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); - struct s390_domain_device *domain_device; unsigned long flags; int cc, rc = 0; if (!zdev) return -ENODEV; - domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); - if (!domain_device) - return -ENOMEM; - if (zdev->s390_domain) __s390_iommu_detach_device(zdev); else if (zdev->dma_table) @@ -132,10 +114,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(s390_domain->dma_table)); - if (cc) { - rc = -EIO; - goto out_free; - } + if (cc) + return -EIO; zdev->dma_table = s390_domain->dma_table; spin_lock_irqsave(&s390_domain->list_lock, flags); @@ -151,9 +131,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, rc = -EINVAL; goto out_unregister; } - domain_device->zdev = zdev; zdev->s390_domain = s390_domain; - list_add(&domain_device->list, &s390_domain->devices); + list_add(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; @@ -161,8 +140,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, out_unregister: zpci_unregister_ioat(zdev, 0); zdev->dma_table = NULL; -out_free: - kfree(domain_device); return rc; } @@ -206,10 +183,10 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, size_t size, int flags) { - struct s390_domain_device *domain_device; phys_addr_t page_addr = pa & PAGE_MASK; dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags, nr_pages, i; + struct zpci_dev *zdev; unsigned long *entry; int rc = 0; @@ -234,8 +211,8 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, } spin_lock(&s390_domain->list_lock); - list_for_each_entry(domain_device, &s390_domain->devices, list) { - rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rc = zpci_refresh_trans((u64)zdev->fh << 32, start_dma_addr, nr_pages * PAGE_SIZE); if (rc) break; -- GitLab From cbf7827bc5dcfa4301aaea6f57eba9a94dbee7b1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:54 +0200 Subject: [PATCH 103/694] iommu/s390: Fix potential s390_domain aperture shrinking The s390 IOMMU driver currently sets the IOMMU domain's aperture to match the device specific DMA address range of the device that is first attached. This is not ideal. For one if the domain has no device attached in the meantime the aperture could be shrunk allowing translations outside the aperture to exist in the translation tables. Also this is a bit of a misuse of the aperture which really should describe what addresses can be translated and not some device specific limitations. Instead of misusing the aperture like this we can instead create reserved ranges for the ranges inaccessible to the attached devices allowing devices with overlapping ranges to still share an IOMMU domain. This also significantly simplifies s390_iommu_attach_device() allowing us to move the aperture check to the beginning of the function and removing the need to hold the device list's lock to check the aperture. As we then use the same aperture for all domains and it only depends on the table properties we can already check zdev->start_dma/end_dma at probe time and turn the check on attach into a WARN_ON(). Suggested-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-4-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index af83ccde16a49..9b3adc61005c3 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) kfree(s390_domain); return NULL; } + s390_domain->domain.geometry.force_aperture = true; + s390_domain->domain.geometry.aperture_start = 0; + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); @@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; - int cc, rc = 0; + int cc; if (!zdev) return -ENODEV; + if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma || + domain->geometry.aperture_end < zdev->start_dma)) + return -EINVAL; + if (zdev->s390_domain) __s390_iommu_detach_device(zdev); else if (zdev->dma_table) @@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return -EIO; zdev->dma_table = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->list_lock, flags); - /* First device defines the DMA range limits */ - if (list_empty(&s390_domain->devices)) { - domain->geometry.aperture_start = zdev->start_dma; - domain->geometry.aperture_end = zdev->end_dma; - domain->geometry.force_aperture = true; - /* Allow only devices with identical DMA range limits */ - } else if (domain->geometry.aperture_start != zdev->start_dma || - domain->geometry.aperture_end != zdev->end_dma) { - spin_unlock_irqrestore(&s390_domain->list_lock, flags); - rc = -EINVAL; - goto out_unregister; - } + zdev->dma_table = s390_domain->dma_table; zdev->s390_domain = s390_domain; + + spin_lock_irqsave(&s390_domain->list_lock, flags); list_add(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; - -out_unregister: - zpci_unregister_ioat(zdev, 0); - zdev->dma_table = NULL; - - return rc; } static void s390_iommu_detach_device(struct iommu_domain *domain, @@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, zpci_dma_init_device(zdev); } +static void s390_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct iommu_resv_region *region; + + if (zdev->start_dma) { + region = iommu_alloc_resv_region(0, zdev->start_dma, 0, + IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); + } + + if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) { + region = iommu_alloc_resv_region(zdev->end_dma + 1, + ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1, + 0, IOMMU_RESV_RESERVED, GFP_KERNEL); + if (!region) + return; + list_add_tail(®ion->list, list); + } +} + static struct iommu_device *s390_iommu_probe_device(struct device *dev) { struct zpci_dev *zdev; @@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) zdev = to_zpci_dev(dev); + if (zdev->start_dma > zdev->end_dma || + zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1) + return ERR_PTR(-EINVAL); + + if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) + zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + return &zdev->iommu_dev; } @@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = { .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .detach_dev = s390_iommu_detach_device, -- GitLab From a4d996c2c4b55a42b21d0f7026b2bd6f7396f666 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:55 +0200 Subject: [PATCH 104/694] iommu/s390: Fix incorrect aperture check The domain->geometry.aperture_end specifies the last valid address treat it as such when checking if a DMA address is valid. Reviewed-by: Pierre Morel Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-5-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9b3adc61005c3..3e601ca6ee0fc 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -213,7 +213,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, int rc = 0; if (dma_addr < s390_domain->domain.geometry.aperture_start || - dma_addr + size > s390_domain->domain.geometry.aperture_end) + (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end) return -EINVAL; nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; -- GitLab From b4d8ae0e907b096583491101ddfc5143b7c08918 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:56 +0200 Subject: [PATCH 105/694] iommu/s390: Fix incorrect pgsize_bitmap The .pgsize_bitmap property of struct iommu_ops is not a page mask but rather has a bit set for each size of pages the IOMMU supports. As the comment correctly pointed out at this moment the code only support 4K pages so simply use SZ_4K here. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-6-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 3e601ca6ee0fc..104dfbec10378 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -12,13 +12,6 @@ #include #include -/* - * Physically contiguous memory regions can be mapped with 4 KiB alignment, - * we allow all page sizes that are an order of 4KiB (no special large page - * support so far). - */ -#define S390_IOMMU_PGSIZES (~0xFFFUL) - static const struct iommu_ops s390_iommu_ops; struct s390_domain { @@ -363,7 +356,7 @@ static const struct iommu_ops s390_iommu_ops = { .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, - .pgsize_bitmap = S390_IOMMU_PGSIZES, + .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, -- GitLab From f3cc4f874efa8d5b10ebd9dc8702cd25b9e536a3 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 25 Oct 2022 13:56:57 +0200 Subject: [PATCH 106/694] iommu/s390: Implement map_pages()/unmap_pages() instead of map()/unmap() While s390-iommu currently implements the map_page()/unmap_page() operations which only map/unmap a single page at a time the internal s390_iommu_update_trans() API already supports mapping/unmapping a range of pages at once. Take advantage of this by implementing the map_pages()/unmap_pages() operations instead thus allowing users of the IOMMU drivers to map multiple pages in a single call followed by a single I/O TLB flush if needed. Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221025115657.1666860-7-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 48 +++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 104dfbec10378..7fb512bece9a8 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -196,20 +196,15 @@ static void s390_iommu_release_device(struct device *dev) static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - size_t size, int flags) + unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; dma_addr_t start_dma_addr = dma_addr; - unsigned long irq_flags, nr_pages, i; + unsigned long irq_flags, i; struct zpci_dev *zdev; unsigned long *entry; int rc = 0; - if (dma_addr < s390_domain->domain.geometry.aperture_start || - (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end) - return -EINVAL; - - nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; if (!nr_pages) return 0; @@ -252,11 +247,24 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, return rc; } -static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int s390_iommu_map_pages(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct s390_domain *s390_domain = to_s390_domain(domain); int flags = ZPCI_PTE_VALID, rc = 0; + size_t size = pgcount << __ffs(pgsize); + + if (pgsize != SZ_4K) + return -EINVAL; + + if (iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + if (!IS_ALIGNED(iova | paddr, pgsize)) + return -EINVAL; if (!(prot & IOMMU_READ)) return -EINVAL; @@ -265,7 +273,9 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + pgcount, flags); + if (!rc) + *mapped = size; return rc; } @@ -301,21 +311,27 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } -static size_t s390_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *gather) +static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = pgcount << __ffs(pgsize); int flags = ZPCI_PTE_INVALID; phys_addr_t paddr; int rc; + if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || + (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) + return 0; + paddr = s390_iommu_iova_to_phys(domain, iova); if (!paddr) return 0; rc = s390_iommu_update_trans(s390_domain, paddr, iova, - size, flags); + pgcount, flags); if (rc) return 0; @@ -361,8 +377,8 @@ static const struct iommu_ops s390_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .detach_dev = s390_iommu_detach_device, - .map = s390_iommu_map, - .unmap = s390_iommu_unmap, + .map_pages = s390_iommu_map_pages, + .unmap_pages = s390_iommu_unmap_pages, .iova_to_phys = s390_iommu_iova_to_phys, .free = s390_domain_free, } -- GitLab From 9ad0c1252e84dbc664f0462707182245ed603237 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:11 +0200 Subject: [PATCH 107/694] iommu/sun50i: Fix reset release Reset signal is asserted by writing 0 to the corresponding locations of masters we want to reset. So in order to deassert all reset signals, we should write 1's to all locations. Current code writes 1's to locations of masters which were just reset which is good. However, at the same time it also writes 0's to other locations and thus asserts reset signals of remaining masters. Fix code by writing all 1's when we want to deassert all reset signals. This bug was discovered when working with Cedrus (video decoder). When it faulted, display went blank due to reset signal assertion. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-2-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index cd9b74ee24def..270204a6ec4ae 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -27,6 +27,7 @@ #include #define IOMMU_RESET_REG 0x010 +#define IOMMU_RESET_RELEASE_ALL 0xffffffff #define IOMMU_ENABLE_REG 0x020 #define IOMMU_ENABLE_ENABLE BIT(0) @@ -893,7 +894,7 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) iommu_write(iommu, IOMMU_INT_CLR_REG, status); iommu_write(iommu, IOMMU_RESET_REG, ~status); - iommu_write(iommu, IOMMU_RESET_REG, status); + iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); spin_unlock(&iommu->iommu_lock); -- GitLab From cef20703e2b2276aaa402ec5a65ec9a09963b83e Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:12 +0200 Subject: [PATCH 108/694] iommu/sun50i: Consider all fault sources for reset We have to reset masters for all faults - permissions, L1 fault or L2 fault. Currently it's done only for permissions. If other type of fault happens, master is in locked up state. Fix that by really considering all fault sources. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-3-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 270204a6ec4ae..bbc269500800f 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -869,8 +869,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) { + u32 status, l1_status, l2_status, resets; struct sun50i_iommu *iommu = dev_id; - u32 status; spin_lock(&iommu->iommu_lock); @@ -880,6 +880,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) return IRQ_NONE; } + l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG); + l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG); + if (status & IOMMU_INT_INVALID_L2PG) sun50i_iommu_handle_pt_irq(iommu, IOMMU_INT_ERR_ADDR_L2_REG, @@ -893,7 +896,8 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) iommu_write(iommu, IOMMU_INT_CLR_REG, status); - iommu_write(iommu, IOMMU_RESET_REG, ~status); + resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK; + iommu_write(iommu, IOMMU_RESET_REG, ~resets); iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); spin_unlock(&iommu->iommu_lock); -- GitLab From eac0104dc69be50bed86926d6f32e82b44f8c921 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:13 +0200 Subject: [PATCH 109/694] iommu/sun50i: Fix R/W permission check Because driver has enum type permissions and iommu subsystem has bitmap type, we have to be careful how check for combined read and write permissions is done. In such case, we have to mask both permissions and check that both are set at the same time. Current code just masks both flags but doesn't check that both are set. In short, it always sets R/W permission, regardles if requested permissions were RO, WO or RW. Fix that. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-4-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index bbc269500800f..df871af04bcb8 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -271,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot) enum sun50i_iommu_aci aci; u32 flags = 0; - if (prot & (IOMMU_READ | IOMMU_WRITE)) + if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE)) aci = SUN50I_IOMMU_ACI_RD_WR; else if (prot & IOMMU_READ) aci = SUN50I_IOMMU_ACI_RD; -- GitLab From 67a8a67f9eceb72e4c73d1d09ed9ab04f4b8e12d Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:14 +0200 Subject: [PATCH 110/694] iommu/sun50i: Fix flush size Function sun50i_table_flush() takes number of entries as an argument, not number of bytes. Fix that mistake in sun50i_dte_get_page_table(). Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-5-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index df871af04bcb8..e62e245060ac7 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -512,7 +512,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, sun50i_iommu_free_page_table(iommu, drop_pt); } - sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); + sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES); sun50i_table_flush(sun50i_domain, dte_addr, 1); return page_table; -- GitLab From e563cc0c787c85a4d9def0a77078dc5d3f445e3d Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 25 Oct 2022 18:54:15 +0200 Subject: [PATCH 111/694] iommu/sun50i: Implement .iotlb_sync_map Allocated iova ranges need to be invalidated immediately or otherwise they might or might not work when used by master or CPU. This was discovered when running video decoder conformity test with Cedrus. Some videos were now and then decoded incorrectly and generated page faults. According to vendor driver, it's enough to invalidate just start and end TLB and PTW cache lines. Documentation says that neighbouring lines must be invalidated too. Finally, when page fault occurs, that iova must be invalidated the same way, according to documentation. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221025165415.307591-6-jernej.skrabec@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index e62e245060ac7..5cb2d44dfb92d 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -93,6 +93,8 @@ #define NUM_PT_ENTRIES 256 #define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE) +#define SPAGE_SIZE 4096 + struct sun50i_iommu { struct iommu_device iommu; @@ -295,6 +297,62 @@ static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain, dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE); } +static void sun50i_iommu_zap_iova(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_MASK_REG, GENMASK(31, 12)); + iommu_write(iommu, IOMMU_TLB_IVLD_ENABLE_REG, + IOMMU_TLB_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_TLB_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "TLB invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_ptw_cache(struct sun50i_iommu *iommu, + unsigned long iova) +{ + u32 reg; + int ret; + + iommu_write(iommu, IOMMU_PC_IVLD_ADDR_REG, iova); + iommu_write(iommu, IOMMU_PC_IVLD_ENABLE_REG, + IOMMU_PC_IVLD_ENABLE_ENABLE); + + ret = readl_poll_timeout_atomic(iommu->base + IOMMU_PC_IVLD_ENABLE_REG, + reg, !reg, 1, 2000); + if (ret) + dev_warn(iommu->dev, "PTW cache invalidation timed out!\n"); +} + +static void sun50i_iommu_zap_range(struct sun50i_iommu *iommu, + unsigned long iova, size_t size) +{ + assert_spin_locked(&iommu->iommu_lock); + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, 0); + + sun50i_iommu_zap_iova(iommu, iova); + sun50i_iommu_zap_iova(iommu, iova + SPAGE_SIZE); + if (size > SPAGE_SIZE) { + sun50i_iommu_zap_iova(iommu, iova + size); + sun50i_iommu_zap_iova(iommu, iova + size + SPAGE_SIZE); + } + sun50i_iommu_zap_ptw_cache(iommu, iova); + sun50i_iommu_zap_ptw_cache(iommu, iova + SZ_1M); + if (size > SZ_1M) { + sun50i_iommu_zap_ptw_cache(iommu, iova + size); + sun50i_iommu_zap_ptw_cache(iommu, iova + size + SZ_1M); + } + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE); +} + static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu) { u32 reg; @@ -344,6 +402,18 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } +static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + sun50i_iommu_zap_range(iommu, iova, size); + spin_unlock_irqrestore(&iommu->iommu_lock, flags); +} + static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { @@ -767,6 +837,7 @@ static const struct iommu_ops sun50i_iommu_ops = { .attach_dev = sun50i_iommu_attach_device, .detach_dev = sun50i_iommu_detach_device, .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, .iova_to_phys = sun50i_iommu_iova_to_phys, .map = sun50i_iommu_map, @@ -786,6 +857,8 @@ static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, report_iommu_fault(iommu->domain, iommu->dev, iova, prot); else dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n"); + + sun50i_iommu_zap_range(iommu, iova, SPAGE_SIZE); } static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu, -- GitLab From 7eb99841f340b80be0d0973b0deb592d75fb8928 Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Wed, 2 Nov 2022 07:35:53 +0100 Subject: [PATCH 112/694] iommu/rockchip: fix permission bits in page table entries v2 As pointed out in the corresponding downstream fix [0], the permission bits of the page table entries are compatible between v1 and v2 of the IOMMU. This is in contrast to the current mainline code that incorrectly assumes that the read and write permission bits are switched. Fix the permission bits by reusing the v1 bit defines. [0] https://github.com/rockchip-linux/kernel/commit/e3bc123a2260145e34b57454da3db0edd117eb8e Fixes: c55356c534aa ("iommu: rockchip: Add support for iommu v2") Signed-off-by: Michael Riesch Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221102063553.2464161-1-michael.riesch@wolfvision.net Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a3fc59b814ab5..a68eadd64f38d 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -280,19 +280,17 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) * 11:9 - Page address bit 34:32 * 8:4 - Page address bit 39:35 * 3 - Security - * 2 - Readable - * 1 - Writable + * 2 - Writable + * 1 - Readable * 0 - 1 if Page @ Page address is valid */ -#define RK_PTE_PAGE_READABLE_V2 BIT(2) -#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) static u32 rk_mk_pte_v2(phys_addr_t page, int prot) { u32 flags = 0; - flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; - flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; return rk_mk_dte_v2(page) | flags; } -- GitLab From 80629af0f3e438b94532cf88f3fb53c711f95138 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 20 Oct 2022 10:21:03 +0200 Subject: [PATCH 113/694] MAINTAINERS: remove section INTEL IOP-ADMA DMA DRIVER Commit cd0ab43ec91a ("dmaengine: remove iop-adma driver") removes the driver's source code, but misses to remove the MAINTAINERS section. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken file pattern. Remove the INTEL IOP-ADMA DMA DRIVER section pointing to the removed driver. Signed-off-by: Lukas Bulwahn Acked-by: Dan Williams Link: https://lore.kernel.org/r/20221020082103.29218-1-lukas.bulwahn@gmail.com Signed-off-by: Vinod Koul --- MAINTAINERS | 5 ----- 1 file changed, 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f185023724..cd1264d24db85 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10332,11 +10332,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ F: include/linux/intel-svm.h -INTEL IOP-ADMA DMA DRIVER -R: Dan Williams -S: Odd fixes -F: drivers/dma/iop-adma.c - INTEL IPU3 CSI-2 CIO2 DRIVER M: Yong Zhi M: Sakari Ailus -- GitLab From cccc46ae362398e43c6b8be38fdb7e39def9e21b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Oct 2022 22:27:47 +0200 Subject: [PATCH 114/694] dmaengine: remove s3c24xx driver The s3c24xx platform was removed and this driver is no longer needed. Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221021203329.4143397-14-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 12 - drivers/dma/Makefile | 1 - drivers/dma/s3c24xx-dma.c | 1428 --------------------- include/linux/platform_data/dma-s3c24xx.h | 48 - 4 files changed, 1489 deletions(-) delete mode 100644 drivers/dma/s3c24xx-dma.c delete mode 100644 include/linux/platform_data/dma-s3c24xx.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index b73fc89ba8776..ea81d825575fb 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -609,18 +609,6 @@ config SPRD_DMA help Enable support for the on-chip DMA controller on Spreadtrum platform. -config S3C24XX_DMAC - bool "Samsung S3C24XX DMA support" - depends on ARCH_S3C24XX || COMPILE_TEST - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - Support for the Samsung S3C24XX DMA controller driver. The - DMA controller is having multiple DMA channels which can be - configured for different peripherals like audio, UART, SPI. - The DMA controller can transfer data from memory to peripheral, - periphal to memory, periphal to periphal and memory to memory. - config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 5b55ada052a7f..a4fd1ce295102 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -70,7 +70,6 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o obj-$(CONFIG_SPRD_DMA) += sprd-dma.o -obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c deleted file mode 100644 index a09eeb545f7d5..0000000000000 --- a/drivers/dma/s3c24xx-dma.c +++ /dev/null @@ -1,1428 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * S3C24XX DMA handling - * - * Copyright (c) 2013 Heiko Stuebner - * - * based on amba-pl08x.c - * - * Copyright (c) 2006 ARM Ltd. - * Copyright (c) 2010 ST-Ericsson SA - * - * Author: Peter Pearse - * Author: Linus Walleij - * - * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals - * that can be routed to any of the 4 to 8 hardware-channels. - * - * Therefore on these DMA controllers the number of channels - * and the number of incoming DMA signals are two totally different things. - * It is usually not possible to theoretically handle all physical signals, - * so a multiplexing scheme with possible denial of use is necessary. - * - * Open items: - * - bursts - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dmaengine.h" -#include "virt-dma.h" - -#define MAX_DMA_CHANNELS 8 - -#define S3C24XX_DISRC 0x00 -#define S3C24XX_DISRCC 0x04 -#define S3C24XX_DISRCC_INC_INCREMENT 0 -#define S3C24XX_DISRCC_INC_FIXED BIT(0) -#define S3C24XX_DISRCC_LOC_AHB 0 -#define S3C24XX_DISRCC_LOC_APB BIT(1) - -#define S3C24XX_DIDST 0x08 -#define S3C24XX_DIDSTC 0x0c -#define S3C24XX_DIDSTC_INC_INCREMENT 0 -#define S3C24XX_DIDSTC_INC_FIXED BIT(0) -#define S3C24XX_DIDSTC_LOC_AHB 0 -#define S3C24XX_DIDSTC_LOC_APB BIT(1) -#define S3C24XX_DIDSTC_INT_TC0 0 -#define S3C24XX_DIDSTC_INT_RELOAD BIT(2) - -#define S3C24XX_DCON 0x10 - -#define S3C24XX_DCON_TC_MASK 0xfffff -#define S3C24XX_DCON_DSZ_BYTE (0 << 20) -#define S3C24XX_DCON_DSZ_HALFWORD (1 << 20) -#define S3C24XX_DCON_DSZ_WORD (2 << 20) -#define S3C24XX_DCON_DSZ_MASK (3 << 20) -#define S3C24XX_DCON_DSZ_SHIFT 20 -#define S3C24XX_DCON_AUTORELOAD 0 -#define S3C24XX_DCON_NORELOAD BIT(22) -#define S3C24XX_DCON_HWTRIG BIT(23) -#define S3C24XX_DCON_HWSRC_SHIFT 24 -#define S3C24XX_DCON_SERV_SINGLE 0 -#define S3C24XX_DCON_SERV_WHOLE BIT(27) -#define S3C24XX_DCON_TSZ_UNIT 0 -#define S3C24XX_DCON_TSZ_BURST4 BIT(28) -#define S3C24XX_DCON_INT BIT(29) -#define S3C24XX_DCON_SYNC_PCLK 0 -#define S3C24XX_DCON_SYNC_HCLK BIT(30) -#define S3C24XX_DCON_DEMAND 0 -#define S3C24XX_DCON_HANDSHAKE BIT(31) - -#define S3C24XX_DSTAT 0x14 -#define S3C24XX_DSTAT_STAT_BUSY BIT(20) -#define S3C24XX_DSTAT_CURRTC_MASK 0xfffff - -#define S3C24XX_DMASKTRIG 0x20 -#define S3C24XX_DMASKTRIG_SWTRIG BIT(0) -#define S3C24XX_DMASKTRIG_ON BIT(1) -#define S3C24XX_DMASKTRIG_STOP BIT(2) - -#define S3C24XX_DMAREQSEL 0x24 -#define S3C24XX_DMAREQSEL_HW BIT(0) - -/* - * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel - * for a DMA source. Instead only specific channels are valid. - * All of these SoCs have 4 physical channels and the number of request - * source bits is 3. Additionally we also need 1 bit to mark the channel - * as valid. - * Therefore we separate the chansel element of the channel data into 4 - * parts of 4 bits each, to hold the information if the channel is valid - * and the hw request source to use. - * - * Example: - * SDI is valid on channels 0, 2 and 3 - with varying hw request sources. - * For it the chansel field would look like - * - * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1 - * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2 - * ((BIT(3) | 2) << 0 * 4) // channel 0, with request source 2 - */ -#define S3C24XX_CHANSEL_WIDTH 4 -#define S3C24XX_CHANSEL_VALID BIT(3) -#define S3C24XX_CHANSEL_REQ_MASK 7 - -/* - * struct soc_data - vendor-specific config parameters for individual SoCs - * @stride: spacing between the registers of each channel - * @has_reqsel: does the controller use the newer requestselection mechanism - * @has_clocks: are controllable dma-clocks present - */ -struct soc_data { - int stride; - bool has_reqsel; - bool has_clocks; -}; - -/* - * enum s3c24xx_dma_chan_state - holds the virtual channel states - * @S3C24XX_DMA_CHAN_IDLE: the channel is idle - * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport - * channel and is running a transfer on it - * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport - * channel to become available (only pertains to memcpy channels) - */ -enum s3c24xx_dma_chan_state { - S3C24XX_DMA_CHAN_IDLE, - S3C24XX_DMA_CHAN_RUNNING, - S3C24XX_DMA_CHAN_WAITING, -}; - -/* - * struct s3c24xx_sg - structure containing data per sg - * @src_addr: src address of sg - * @dst_addr: dst address of sg - * @len: transfer len in bytes - * @node: node for txd's dsg_list - */ -struct s3c24xx_sg { - dma_addr_t src_addr; - dma_addr_t dst_addr; - size_t len; - struct list_head node; -}; - -/* - * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor - * @vd: virtual DMA descriptor - * @dsg_list: list of children sg's - * @at: sg currently being transfered - * @width: transfer width - * @disrcc: value for source control register - * @didstc: value for destination control register - * @dcon: base value for dcon register - * @cyclic: indicate cyclic transfer - */ -struct s3c24xx_txd { - struct virt_dma_desc vd; - struct list_head dsg_list; - struct list_head *at; - u8 width; - u32 disrcc; - u32 didstc; - u32 dcon; - bool cyclic; -}; - -struct s3c24xx_dma_chan; - -/* - * struct s3c24xx_dma_phy - holder for the physical channels - * @id: physical index to this channel - * @valid: does the channel have all required elements - * @base: virtual memory base (remapped) for the this channel - * @irq: interrupt for this channel - * @clk: clock for this channel - * @lock: a lock to use when altering an instance of this struct - * @serving: virtual channel currently being served by this physicalchannel - * @host: a pointer to the host (internal use) - */ -struct s3c24xx_dma_phy { - unsigned int id; - bool valid; - void __iomem *base; - int irq; - struct clk *clk; - spinlock_t lock; - struct s3c24xx_dma_chan *serving; - struct s3c24xx_dma_engine *host; -}; - -/* - * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel - * @id: the id of the channel - * @name: name of the channel - * @vc: wrapped virtual channel - * @phy: the physical channel utilized by this channel, if there is one - * @runtime_addr: address for RX/TX according to the runtime config - * @at: active transaction on this channel - * @lock: a lock for this channel data - * @host: a pointer to the host (internal use) - * @state: whether the channel is idle, running etc - * @slave: whether this channel is a device (slave) or for memcpy - */ -struct s3c24xx_dma_chan { - int id; - const char *name; - struct virt_dma_chan vc; - struct s3c24xx_dma_phy *phy; - struct dma_slave_config cfg; - struct s3c24xx_txd *at; - struct s3c24xx_dma_engine *host; - enum s3c24xx_dma_chan_state state; - bool slave; -}; - -/* - * struct s3c24xx_dma_engine - the local state holder for the S3C24XX - * @pdev: the corresponding platform device - * @pdata: platform data passed in from the platform/machine - * @base: virtual memory base (remapped) - * @slave: slave engine for this instance - * @memcpy: memcpy engine for this instance - * @phy_chans: array of data for the physical channels - */ -struct s3c24xx_dma_engine { - struct platform_device *pdev; - const struct s3c24xx_dma_platdata *pdata; - struct soc_data *sdata; - void __iomem *base; - struct dma_device slave; - struct dma_device memcpy; - struct s3c24xx_dma_phy *phy_chans; -}; - -/* - * Physical channel handling - */ - -/* - * Check whether a certain channel is busy or not. - */ -static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy) -{ - unsigned int val = readl(phy->base + S3C24XX_DSTAT); - return val & S3C24XX_DSTAT_STAT_BUSY; -} - -static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan, - struct s3c24xx_dma_phy *phy) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - int phyvalid; - - /* every phy is valid for memcopy channels */ - if (!s3cchan->slave) - return true; - - /* On newer variants all phys can be used for all virtual channels */ - if (s3cdma->sdata->has_reqsel) - return true; - - phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH)); - return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false; -} - -/* - * Allocate a physical channel for a virtual channel - * - * Try to locate a physical channel to be used for this transfer. If all - * are taken return NULL and the requester will have to cope by using - * some fallback PIO mode or retrying later. - */ -static -struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy = NULL; - unsigned long flags; - int i; - int ret; - - for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { - phy = &s3cdma->phy_chans[i]; - - if (!phy->valid) - continue; - - if (!s3c24xx_dma_phy_valid(s3cchan, phy)) - continue; - - spin_lock_irqsave(&phy->lock, flags); - - if (!phy->serving) { - phy->serving = s3cchan; - spin_unlock_irqrestore(&phy->lock, flags); - break; - } - - spin_unlock_irqrestore(&phy->lock, flags); - } - - /* No physical channel available, cope with it */ - if (i == s3cdma->pdata->num_phy_channels) { - dev_warn(&s3cdma->pdev->dev, "no phy channel available\n"); - return NULL; - } - - /* start the phy clock */ - if (s3cdma->sdata->has_clocks) { - ret = clk_enable(phy->clk); - if (ret) { - dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n", - phy->id, ret); - phy->serving = NULL; - return NULL; - } - } - - return phy; -} - -/* - * Mark the physical channel as free. - * - * This drops the link between the physical and virtual channel. - */ -static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy) -{ - struct s3c24xx_dma_engine *s3cdma = phy->host; - - if (s3cdma->sdata->has_clocks) - clk_disable(phy->clk); - - phy->serving = NULL; -} - -/* - * Stops the channel by writing the stop bit. - * This should not be used for an on-going transfer, but as a method of - * shutting down a channel (eg, when it's no longer used) or terminating a - * transfer. - */ -static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy) -{ - writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG); -} - -/* - * Virtual channel handling - */ - -static inline -struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan) -{ - return container_of(chan, struct s3c24xx_dma_chan, vc.chan); -} - -static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_phy *phy = s3cchan->phy; - struct s3c24xx_txd *txd = s3cchan->at; - u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK; - - return tc * txd->width; -} - -static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - unsigned long flags; - int ret = 0; - - /* Reject definitely invalid configurations */ - if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || - config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) - return -EINVAL; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - - if (!s3cchan->slave) { - ret = -EINVAL; - goto out; - } - - s3cchan->cfg = *config; - -out: - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - return ret; -} - -/* - * Transfer handling - */ - -static inline -struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx) -{ - return container_of(tx, struct s3c24xx_txd, vd.tx); -} - -static struct s3c24xx_txd *s3c24xx_dma_get_txd(void) -{ - struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); - - if (txd) { - INIT_LIST_HEAD(&txd->dsg_list); - txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD; - } - - return txd; -} - -static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd) -{ - struct s3c24xx_sg *dsg, *_dsg; - - list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { - list_del(&dsg->node); - kfree(dsg); - } - - kfree(txd); -} - -static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan, - struct s3c24xx_txd *txd) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy = s3cchan->phy; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node); - u32 dcon = txd->dcon; - u32 val; - - /* transfer-size and -count from len and width */ - switch (txd->width) { - case 1: - dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len; - break; - case 2: - dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2); - break; - case 4: - dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4); - break; - } - - if (s3cchan->slave) { - struct s3c24xx_dma_channel *cdata = - &pdata->channels[s3cchan->id]; - - if (s3cdma->sdata->has_reqsel) { - writel_relaxed((cdata->chansel << 1) | - S3C24XX_DMAREQSEL_HW, - phy->base + S3C24XX_DMAREQSEL); - } else { - int csel = cdata->chansel >> (phy->id * - S3C24XX_CHANSEL_WIDTH); - - csel &= S3C24XX_CHANSEL_REQ_MASK; - dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT; - dcon |= S3C24XX_DCON_HWTRIG; - } - } else { - if (s3cdma->sdata->has_reqsel) - writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL); - } - - writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC); - writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC); - writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST); - writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC); - writel_relaxed(dcon, phy->base + S3C24XX_DCON); - - val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG); - val &= ~S3C24XX_DMASKTRIG_STOP; - val |= S3C24XX_DMASKTRIG_ON; - - /* trigger the dma operation for memcpy transfers */ - if (!s3cchan->slave) - val |= S3C24XX_DMASKTRIG_SWTRIG; - - writel(val, phy->base + S3C24XX_DMASKTRIG); -} - -/* - * Set the initial DMA register values and start first sg. - */ -static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_phy *phy = s3cchan->phy; - struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc); - struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); - - list_del(&txd->vd.node); - - s3cchan->at = txd; - - /* Wait for channel inactive */ - while (s3c24xx_dma_phy_busy(phy)) - cpu_relax(); - - /* point to the first element of the sg list */ - txd->at = txd->dsg_list.next; - s3c24xx_dma_start_next_sg(s3cchan, txd); -} - -/* - * Try to allocate a physical channel. When successful, assign it to - * this virtual channel, and initiate the next descriptor. The - * virtual channel lock must be held at this point. - */ -static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_phy *phy; - - phy = s3c24xx_dma_get_phy(s3cchan); - if (!phy) { - dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n", - s3cchan->name); - s3cchan->state = S3C24XX_DMA_CHAN_WAITING; - return; - } - - dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n", - phy->id, s3cchan->name); - - s3cchan->phy = phy; - s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; - - s3c24xx_dma_start_next_txd(s3cchan); -} - -static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy, - struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - - dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n", - phy->id, s3cchan->name); - - /* - * We do this without taking the lock; we're really only concerned - * about whether this pointer is NULL or not, and we're guaranteed - * that this will only be called when it _already_ is non-NULL. - */ - phy->serving = s3cchan; - s3cchan->phy = phy; - s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; - s3c24xx_dma_start_next_txd(s3cchan); -} - -/* - * Free a physical DMA channel, potentially reallocating it to another - * virtual channel if we have any pending. - */ -static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan) -{ - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_dma_chan *p, *next; - -retry: - next = NULL; - - /* Find a waiting virtual channel for the next transfer. */ - list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node) - if (p->state == S3C24XX_DMA_CHAN_WAITING) { - next = p; - break; - } - - if (!next) { - list_for_each_entry(p, &s3cdma->slave.channels, - vc.chan.device_node) - if (p->state == S3C24XX_DMA_CHAN_WAITING && - s3c24xx_dma_phy_valid(p, s3cchan->phy)) { - next = p; - break; - } - } - - /* Ensure that the physical channel is stopped */ - s3c24xx_dma_terminate_phy(s3cchan->phy); - - if (next) { - bool success; - - /* - * Eww. We know this isn't going to deadlock - * but lockdep probably doesn't. - */ - spin_lock(&next->vc.lock); - /* Re-check the state now that we have the lock */ - success = next->state == S3C24XX_DMA_CHAN_WAITING; - if (success) - s3c24xx_dma_phy_reassign_start(s3cchan->phy, next); - spin_unlock(&next->vc.lock); - - /* If the state changed, try to find another channel */ - if (!success) - goto retry; - } else { - /* No more jobs, so free up the physical channel */ - s3c24xx_dma_put_phy(s3cchan->phy); - } - - s3cchan->phy = NULL; - s3cchan->state = S3C24XX_DMA_CHAN_IDLE; -} - -static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd) -{ - struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan); - - if (!s3cchan->slave) - dma_descriptor_unmap(&vd->tx); - - s3c24xx_dma_free_txd(txd); -} - -static irqreturn_t s3c24xx_dma_irq(int irq, void *data) -{ - struct s3c24xx_dma_phy *phy = data; - struct s3c24xx_dma_chan *s3cchan = phy->serving; - struct s3c24xx_txd *txd; - - dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id); - - /* - * Interrupts happen to notify the completion of a transfer and the - * channel should have moved into its stop state already on its own. - * Therefore interrupts on channels not bound to a virtual channel - * should never happen. Nevertheless send a terminate command to the - * channel if the unlikely case happens. - */ - if (unlikely(!s3cchan)) { - dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n", - phy->id); - - s3c24xx_dma_terminate_phy(phy); - - return IRQ_HANDLED; - } - - spin_lock(&s3cchan->vc.lock); - txd = s3cchan->at; - if (txd) { - /* when more sg's are in this txd, start the next one */ - if (!list_is_last(txd->at, &txd->dsg_list)) { - txd->at = txd->at->next; - if (txd->cyclic) - vchan_cyclic_callback(&txd->vd); - s3c24xx_dma_start_next_sg(s3cchan, txd); - } else if (!txd->cyclic) { - s3cchan->at = NULL; - vchan_cookie_complete(&txd->vd); - - /* - * And start the next descriptor (if any), - * otherwise free this channel. - */ - if (vchan_next_desc(&s3cchan->vc)) - s3c24xx_dma_start_next_txd(s3cchan); - else - s3c24xx_dma_phy_free(s3cchan); - } else { - vchan_cyclic_callback(&txd->vd); - - /* Cyclic: reset at beginning */ - txd->at = txd->dsg_list.next; - s3c24xx_dma_start_next_sg(s3cchan, txd); - } - } - spin_unlock(&s3cchan->vc.lock); - - return IRQ_HANDLED; -} - -/* - * The DMA ENGINE API - */ - -static int s3c24xx_dma_terminate_all(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - LIST_HEAD(head); - unsigned long flags; - int ret; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - - if (!s3cchan->phy && !s3cchan->at) { - dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n", - s3cchan->id); - ret = -EINVAL; - goto unlock; - } - - s3cchan->state = S3C24XX_DMA_CHAN_IDLE; - - /* Mark physical channel as free */ - if (s3cchan->phy) - s3c24xx_dma_phy_free(s3cchan); - - /* Dequeue current job */ - if (s3cchan->at) { - vchan_terminate_vdesc(&s3cchan->at->vd); - s3cchan->at = NULL; - } - - /* Dequeue jobs not yet fired as well */ - - vchan_get_all_descriptors(&s3cchan->vc, &head); - - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - vchan_dma_desc_free_list(&s3cchan->vc, &head); - - return 0; - -unlock: - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - return ret; -} - -static void s3c24xx_dma_synchronize(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - - vchan_synchronize(&s3cchan->vc); -} - -static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan) -{ - /* Ensure all queued descriptors are freed */ - vchan_free_chan_resources(to_virt_chan(chan)); -} - -static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan, - dma_cookie_t cookie, struct dma_tx_state *txstate) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - struct virt_dma_desc *vd; - unsigned long flags; - enum dma_status ret; - size_t bytes = 0; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - ret = dma_cookie_status(chan, cookie, txstate); - - /* - * There's no point calculating the residue if there's - * no txstate to store the value. - */ - if (ret == DMA_COMPLETE || !txstate) { - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - return ret; - } - - vd = vchan_find_desc(&s3cchan->vc, cookie); - if (vd) { - /* On the issued list, so hasn't been processed yet */ - txd = to_s3c24xx_txd(&vd->tx); - - list_for_each_entry(dsg, &txd->dsg_list, node) - bytes += dsg->len; - } else { - /* - * Currently running, so sum over the pending sg's and - * the currently active one. - */ - txd = s3cchan->at; - - dsg = list_entry(txd->at, struct s3c24xx_sg, node); - list_for_each_entry_from(dsg, &txd->dsg_list, node) - bytes += dsg->len; - - bytes += s3c24xx_dma_getbytes_chan(s3cchan); - } - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); - - /* - * This cookie not complete yet - * Get number of bytes left in the active transactions and queue - */ - dma_set_residue(txstate, bytes); - - /* Whether waiting or running, we're in progress */ - return ret; -} - -/* - * Initialize a descriptor to be used by memcpy submit - */ -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( - struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, - size_t len, unsigned long flags) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - int src_mod, dest_mod; - - dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n", - len, s3cchan->name); - - if ((len & S3C24XX_DCON_TC_MASK) != len) { - dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len); - return NULL; - } - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->src_addr = src; - dsg->dst_addr = dest; - dsg->len = len; - - /* - * Determine a suitable transfer width. - * The DMA controller cannot fetch/store information which is not - * naturally aligned on the bus, i.e., a 4 byte fetch must start at - * an address divisible by 4 - more generally addr % width must be 0. - */ - src_mod = src % 4; - dest_mod = dest % 4; - switch (len % 4) { - case 0: - txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1; - break; - case 2: - txd->width = ((src_mod == 2 || src_mod == 0) && - (dest_mod == 2 || dest_mod == 0)) ? 2 : 1; - break; - default: - txd->width = 1; - break; - } - - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT; - txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK | - S3C24XX_DCON_SERV_WHOLE; - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( - struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction direction, unsigned long flags) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - unsigned sg_len; - dma_addr_t slave_addr; - u32 hwcfg = 0; - int i; - - dev_dbg(&s3cdma->pdev->dev, - "prepare cyclic transaction of %zu bytes with period %zu from %s\n", - size, period, s3cchan->name); - - if (!is_slave_direction(direction)) { - dev_err(&s3cdma->pdev->dev, - "direction %d unsupported\n", direction); - return NULL; - } - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - txd->cyclic = 1; - - if (cdata->handshake) - txd->dcon |= S3C24XX_DCON_HANDSHAKE; - - switch (cdata->bus) { - case S3C24XX_DMA_APB: - txd->dcon |= S3C24XX_DCON_SYNC_PCLK; - hwcfg |= S3C24XX_DISRCC_LOC_APB; - break; - case S3C24XX_DMA_AHB: - txd->dcon |= S3C24XX_DCON_SYNC_HCLK; - hwcfg |= S3C24XX_DISRCC_LOC_AHB; - break; - } - - /* - * Always assume our peripheral desintation is a fixed - * address in memory. - */ - hwcfg |= S3C24XX_DISRCC_INC_FIXED; - - /* - * Individual dma operations are requested by the slave, - * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). - */ - txd->dcon |= S3C24XX_DCON_SERV_SINGLE; - - if (direction == DMA_MEM_TO_DEV) { - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | - S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = hwcfg; - slave_addr = s3cchan->cfg.dst_addr; - txd->width = s3cchan->cfg.dst_addr_width; - } else { - txd->disrcc = hwcfg; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | - S3C24XX_DIDSTC_INC_INCREMENT; - slave_addr = s3cchan->cfg.src_addr; - txd->width = s3cchan->cfg.src_addr_width; - } - - sg_len = size / period; - - for (i = 0; i < sg_len; i++) { - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->len = period; - /* Check last period length */ - if (i == sg_len - 1) - dsg->len = size - period * i; - if (direction == DMA_MEM_TO_DEV) { - dsg->src_addr = addr + period * i; - dsg->dst_addr = slave_addr; - } else { /* DMA_DEV_TO_MEM */ - dsg->src_addr = slave_addr; - dsg->dst_addr = addr + period * i; - } - } - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg( - struct dma_chan *chan, struct scatterlist *sgl, - unsigned int sg_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - struct s3c24xx_dma_engine *s3cdma = s3cchan->host; - const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; - struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; - struct s3c24xx_txd *txd; - struct s3c24xx_sg *dsg; - struct scatterlist *sg; - dma_addr_t slave_addr; - u32 hwcfg = 0; - int tmp; - - dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n", - sg_dma_len(sgl), s3cchan->name); - - txd = s3c24xx_dma_get_txd(); - if (!txd) - return NULL; - - if (cdata->handshake) - txd->dcon |= S3C24XX_DCON_HANDSHAKE; - - switch (cdata->bus) { - case S3C24XX_DMA_APB: - txd->dcon |= S3C24XX_DCON_SYNC_PCLK; - hwcfg |= S3C24XX_DISRCC_LOC_APB; - break; - case S3C24XX_DMA_AHB: - txd->dcon |= S3C24XX_DCON_SYNC_HCLK; - hwcfg |= S3C24XX_DISRCC_LOC_AHB; - break; - } - - /* - * Always assume our peripheral desintation is a fixed - * address in memory. - */ - hwcfg |= S3C24XX_DISRCC_INC_FIXED; - - /* - * Individual dma operations are requested by the slave, - * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). - */ - txd->dcon |= S3C24XX_DCON_SERV_SINGLE; - - if (direction == DMA_MEM_TO_DEV) { - txd->disrcc = S3C24XX_DISRCC_LOC_AHB | - S3C24XX_DISRCC_INC_INCREMENT; - txd->didstc = hwcfg; - slave_addr = s3cchan->cfg.dst_addr; - txd->width = s3cchan->cfg.dst_addr_width; - } else if (direction == DMA_DEV_TO_MEM) { - txd->disrcc = hwcfg; - txd->didstc = S3C24XX_DIDSTC_LOC_AHB | - S3C24XX_DIDSTC_INC_INCREMENT; - slave_addr = s3cchan->cfg.src_addr; - txd->width = s3cchan->cfg.src_addr_width; - } else { - s3c24xx_dma_free_txd(txd); - dev_err(&s3cdma->pdev->dev, - "direction %d unsupported\n", direction); - return NULL; - } - - for_each_sg(sgl, sg, sg_len, tmp) { - dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); - if (!dsg) { - s3c24xx_dma_free_txd(txd); - return NULL; - } - list_add_tail(&dsg->node, &txd->dsg_list); - - dsg->len = sg_dma_len(sg); - if (direction == DMA_MEM_TO_DEV) { - dsg->src_addr = sg_dma_address(sg); - dsg->dst_addr = slave_addr; - } else { /* DMA_DEV_TO_MEM */ - dsg->src_addr = slave_addr; - dsg->dst_addr = sg_dma_address(sg); - } - } - - return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); -} - -/* - * Slave transactions callback to the slave device to allow - * synchronization of slave DMA signals with the DMAC enable - */ -static void s3c24xx_dma_issue_pending(struct dma_chan *chan) -{ - struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); - unsigned long flags; - - spin_lock_irqsave(&s3cchan->vc.lock, flags); - if (vchan_issue_pending(&s3cchan->vc)) { - if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING) - s3c24xx_dma_phy_alloc_and_start(s3cchan); - } - spin_unlock_irqrestore(&s3cchan->vc.lock, flags); -} - -/* - * Bringup and teardown - */ - -/* - * Initialise the DMAC memcpy/slave channels. - * Make a local wrapper to hold required data - */ -static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma, - struct dma_device *dmadev, unsigned int channels, bool slave) -{ - struct s3c24xx_dma_chan *chan; - int i; - - INIT_LIST_HEAD(&dmadev->channels); - - /* - * Register as many memcpy as we have physical channels, - * we won't always be able to use all but the code will have - * to cope with that situation. - */ - for (i = 0; i < channels; i++) { - chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL); - if (!chan) - return -ENOMEM; - - chan->id = i; - chan->host = s3cdma; - chan->state = S3C24XX_DMA_CHAN_IDLE; - - if (slave) { - chan->slave = true; - chan->name = kasprintf(GFP_KERNEL, "slave%d", i); - if (!chan->name) - return -ENOMEM; - } else { - chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); - if (!chan->name) - return -ENOMEM; - } - dev_dbg(dmadev->dev, - "initialize virtual channel \"%s\"\n", - chan->name); - - chan->vc.desc_free = s3c24xx_dma_desc_free; - vchan_init(&chan->vc, dmadev); - } - dev_info(dmadev->dev, "initialized %d virtual %s channels\n", - i, slave ? "slave" : "memcpy"); - return i; -} - -static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev) -{ - struct s3c24xx_dma_chan *chan = NULL; - struct s3c24xx_dma_chan *next; - - list_for_each_entry_safe(chan, - next, &dmadev->channels, vc.chan.device_node) { - list_del(&chan->vc.chan.device_node); - tasklet_kill(&chan->vc.task); - } -} - -/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */ -static struct soc_data soc_s3c2410 = { - .stride = 0x40, - .has_reqsel = false, - .has_clocks = false, -}; - -/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */ -static struct soc_data soc_s3c2412 = { - .stride = 0x40, - .has_reqsel = true, - .has_clocks = true, -}; - -/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */ -static struct soc_data soc_s3c2443 = { - .stride = 0x100, - .has_reqsel = true, - .has_clocks = true, -}; - -static const struct platform_device_id s3c24xx_dma_driver_ids[] = { - { - .name = "s3c2410-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2410, - }, { - .name = "s3c2412-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2412, - }, { - .name = "s3c2443-dma", - .driver_data = (kernel_ulong_t)&soc_s3c2443, - }, - { }, -}; - -static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev) -{ - return (struct soc_data *) - platform_get_device_id(pdev)->driver_data; -} - -static int s3c24xx_dma_probe(struct platform_device *pdev) -{ - const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); - struct s3c24xx_dma_engine *s3cdma; - struct soc_data *sdata; - struct resource *res; - int ret; - int i; - - if (!pdata) { - dev_err(&pdev->dev, "platform data missing\n"); - return -ENODEV; - } - - /* Basic sanity check */ - if (pdata->num_phy_channels > MAX_DMA_CHANNELS) { - dev_err(&pdev->dev, "too many dma channels %d, max %d\n", - pdata->num_phy_channels, MAX_DMA_CHANNELS); - return -EINVAL; - } - - sdata = s3c24xx_dma_get_soc_data(pdev); - if (!sdata) - return -EINVAL; - - s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL); - if (!s3cdma) - return -ENOMEM; - - s3cdma->pdev = pdev; - s3cdma->pdata = pdata; - s3cdma->sdata = sdata; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - s3cdma->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(s3cdma->base)) - return PTR_ERR(s3cdma->base); - - s3cdma->phy_chans = devm_kcalloc(&pdev->dev, - pdata->num_phy_channels, - sizeof(struct s3c24xx_dma_phy), - GFP_KERNEL); - if (!s3cdma->phy_chans) - return -ENOMEM; - - /* acquire irqs and clocks for all physical channels */ - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - char clk_name[6]; - - phy->id = i; - phy->base = s3cdma->base + (i * sdata->stride); - phy->host = s3cdma; - - phy->irq = platform_get_irq(pdev, i); - if (phy->irq < 0) - continue; - - ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq, - 0, pdev->name, phy); - if (ret) { - dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n", - i, ret); - continue; - } - - if (sdata->has_clocks) { - sprintf(clk_name, "dma.%d", i); - phy->clk = devm_clk_get(&pdev->dev, clk_name); - if (IS_ERR(phy->clk) && sdata->has_clocks) { - dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n", - i, PTR_ERR(phy->clk)); - continue; - } - - ret = clk_prepare(phy->clk); - if (ret) { - dev_err(&pdev->dev, "clock for phy %d failed, error %d\n", - i, ret); - continue; - } - } - - spin_lock_init(&phy->lock); - phy->valid = true; - - dev_dbg(&pdev->dev, "physical channel %d is %s\n", - i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE"); - } - - /* Initialize memcpy engine */ - dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask); - dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask); - s3cdma->memcpy.dev = &pdev->dev; - s3cdma->memcpy.device_free_chan_resources = - s3c24xx_dma_free_chan_resources; - s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy; - s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status; - s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending; - s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config; - s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all; - s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize; - - /* Initialize slave engine for SoC internal dedicated peripherals */ - dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask); - dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask); - dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask); - s3cdma->slave.dev = &pdev->dev; - s3cdma->slave.device_free_chan_resources = - s3c24xx_dma_free_chan_resources; - s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status; - s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending; - s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg; - s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic; - s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config; - s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all; - s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize; - s3cdma->slave.filter.map = pdata->slave_map; - s3cdma->slave.filter.mapcnt = pdata->slavecnt; - s3cdma->slave.filter.fn = s3c24xx_dma_filter; - - /* Register as many memcpy channels as there are physical channels */ - ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy, - pdata->num_phy_channels, false); - if (ret <= 0) { - dev_warn(&pdev->dev, - "%s failed to enumerate memcpy channels - %d\n", - __func__, ret); - goto err_memcpy; - } - - /* Register slave channels */ - ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave, - pdata->num_channels, true); - if (ret <= 0) { - dev_warn(&pdev->dev, - "%s failed to enumerate slave channels - %d\n", - __func__, ret); - goto err_slave; - } - - ret = dma_async_device_register(&s3cdma->memcpy); - if (ret) { - dev_warn(&pdev->dev, - "%s failed to register memcpy as an async device - %d\n", - __func__, ret); - goto err_memcpy_reg; - } - - ret = dma_async_device_register(&s3cdma->slave); - if (ret) { - dev_warn(&pdev->dev, - "%s failed to register slave as an async device - %d\n", - __func__, ret); - goto err_slave_reg; - } - - platform_set_drvdata(pdev, s3cdma); - dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n", - pdata->num_phy_channels); - - return 0; - -err_slave_reg: - dma_async_device_unregister(&s3cdma->memcpy); -err_memcpy_reg: - s3c24xx_dma_free_virtual_channels(&s3cdma->slave); -err_slave: - s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); -err_memcpy: - if (sdata->has_clocks) - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - if (phy->valid) - clk_unprepare(phy->clk); - } - - return ret; -} - -static void s3c24xx_dma_free_irq(struct platform_device *pdev, - struct s3c24xx_dma_engine *s3cdma) -{ - int i; - - for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - - devm_free_irq(&pdev->dev, phy->irq, phy); - } -} - -static int s3c24xx_dma_remove(struct platform_device *pdev) -{ - const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); - struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev); - struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev); - int i; - - dma_async_device_unregister(&s3cdma->slave); - dma_async_device_unregister(&s3cdma->memcpy); - - s3c24xx_dma_free_irq(pdev, s3cdma); - - s3c24xx_dma_free_virtual_channels(&s3cdma->slave); - s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); - - if (sdata->has_clocks) - for (i = 0; i < pdata->num_phy_channels; i++) { - struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; - if (phy->valid) - clk_unprepare(phy->clk); - } - - return 0; -} - -static struct platform_driver s3c24xx_dma_driver = { - .driver = { - .name = "s3c24xx-dma", - }, - .id_table = s3c24xx_dma_driver_ids, - .probe = s3c24xx_dma_probe, - .remove = s3c24xx_dma_remove, -}; - -module_platform_driver(s3c24xx_dma_driver); - -bool s3c24xx_dma_filter(struct dma_chan *chan, void *param) -{ - struct s3c24xx_dma_chan *s3cchan; - - if (chan->device->dev->driver != &s3c24xx_dma_driver.driver) - return false; - - s3cchan = to_s3c24xx_dma_chan(chan); - - return s3cchan->id == (uintptr_t)param; -} -EXPORT_SYMBOL(s3c24xx_dma_filter); - -MODULE_DESCRIPTION("S3C24XX DMA Driver"); -MODULE_AUTHOR("Heiko Stuebner"); -MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h deleted file mode 100644 index 96d02dbeea67a..0000000000000 --- a/include/linux/platform_data/dma-s3c24xx.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * S3C24XX DMA handling - * - * Copyright (c) 2013 Heiko Stuebner - */ - -/* Helper to encode the source selection constraints for early s3c socs. */ -#define S3C24XX_DMA_CHANREQ(src, chan) ((BIT(3) | src) << chan * 4) - -enum s3c24xx_dma_bus { - S3C24XX_DMA_APB, - S3C24XX_DMA_AHB, -}; - -/** - * @bus: on which bus does the peripheral reside - AHB or APB. - * @handshake: is a handshake with the peripheral necessary - * @chansel: channel selection information, depending on variant; reqsel for - * s3c2443 and later and channel-selection map for earlier SoCs - * see CHANSEL doc in s3c2443-dma.c - */ -struct s3c24xx_dma_channel { - enum s3c24xx_dma_bus bus; - bool handshake; - u16 chansel; -}; - -struct dma_slave_map; - -/** - * struct s3c24xx_dma_platdata - platform specific settings - * @num_phy_channels: number of physical channels - * @channels: array of virtual channel descriptions - * @num_channels: number of virtual channels - * @slave_map: dma slave map matching table - * @slavecnt: number of elements in slave_map - */ -struct s3c24xx_dma_platdata { - int num_phy_channels; - struct s3c24xx_dma_channel *channels; - int num_channels; - const struct dma_slave_map *slave_map; - int slavecnt; -}; - -struct dma_chan; -bool s3c24xx_dma_filter(struct dma_chan *chan, void *param); -- GitLab From a92b744f28e86e05514e6bbd889131f6693b6eed Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 25 Oct 2022 13:12:16 +0530 Subject: [PATCH 115/694] dmaengine: xilinx_dma: fix xilinx_dma_child_probe() return documentation Modify xilinx_dma_child_probe() return documentation to be inline with implementation i.e. can also return failure value on error. Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1666683737-7668-2-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 8cd4e69dc7b4c..3b8cfeccf2da9 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2924,7 +2924,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, * @xdev: Driver specific device structure * @node: Device node * - * Return: 0 always. + * Return: '0' on success and failure value on error. */ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) -- GitLab From 73f11324738a4c23159cf22b08225b6642232982 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 25 Oct 2022 13:12:17 +0530 Subject: [PATCH 116/694] dmaengine: xilinx_dma : add xilinx_dma_device_config() return documentation document xilinx_dma_device_config() return value. Fixes below kernel-doc warning. xilinx_dma.c:1665: warning: No description found for return value of 'xilinx_dma_device_config' Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1666683737-7668-3-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 3b8cfeccf2da9..a8d23cdf883e5 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1659,6 +1659,8 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan) * xilinx_dma_device_config - Configure the DMA channel * @dchan: DMA channel * @config: channel configuration + * + * Return: 0 always. */ static int xilinx_dma_device_config(struct dma_chan *dchan, struct dma_slave_config *config) -- GitLab From 3e98b9bd8469d0b78975be9b36e423b30b0badbe Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 28 Oct 2022 13:53:36 +0200 Subject: [PATCH 117/694] dmaengine: sh: Remove unused shdma-arm.h shdma-arm.h was introduced with commit 1e69653d40f1 ("DMA: shdma: add r8a73a4 DMAC data to the device ID table"), and its sole user was removed with commit a19788612f51 ("dmaengine: sh: Remove R-Mobile APE6 support"). The latter mentions r8a73a4.dtsi but shdma support was removed from that with commit cfda82037780 ("ARM: dts: r8a73a4: Remove non-functional DMA support"), so it seems this is safe to remove. Signed-off-by: Stephen Kitt Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20221028115336.1052782-1-steve@sk2.org Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-arm.h | 48 -------------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 drivers/dma/sh/shdma-arm.h diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h deleted file mode 100644 index 7459f9a13b5be..0000000000000 --- a/drivers/dma/sh/shdma-arm.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Renesas SuperH DMA Engine support - * - * Copyright (C) 2013 Renesas Electronics, Inc. - */ - -#ifndef SHDMA_ARM_H -#define SHDMA_ARM_H - -#include "shdma.h" - -/* Transmit sizes and respective CHCR register values */ -enum { - XMIT_SZ_8BIT = 0, - XMIT_SZ_16BIT = 1, - XMIT_SZ_32BIT = 2, - XMIT_SZ_64BIT = 7, - XMIT_SZ_128BIT = 3, - XMIT_SZ_256BIT = 4, - XMIT_SZ_512BIT = 5, -}; - -/* log2(size / 8) - used to calculate number of transfers */ -#define SH_DMAE_TS_SHIFT { \ - [XMIT_SZ_8BIT] = 0, \ - [XMIT_SZ_16BIT] = 1, \ - [XMIT_SZ_32BIT] = 2, \ - [XMIT_SZ_64BIT] = 3, \ - [XMIT_SZ_128BIT] = 4, \ - [XMIT_SZ_256BIT] = 5, \ - [XMIT_SZ_512BIT] = 6, \ -} - -#define TS_LOW_BIT 0x3 /* --xx */ -#define TS_HI_BIT 0xc /* xx-- */ - -#define TS_LOW_SHIFT (3) -#define TS_HI_SHIFT (20 - 2) /* 2 bits for shifted low TS */ - -#define TS_INDEX2VAL(i) \ - ((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\ - (((i) & TS_HI_BIT) << TS_HI_SHIFT)) - -#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz))) -#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz))) - -#endif -- GitLab From 9a8ddb35a9d5d3ad76784a012459b256a9d7de7e Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 22 Oct 2022 15:49:49 +0800 Subject: [PATCH 118/694] dmaengine: idxd: Make read buffer sysfs attributes invisible for Intel IAA In current code, the following sysfs attributes are exposed to user to show or update the values: max_read_buffers (max_tokens) read_buffer_limit (token_limit) group/read_buffers_allowed (group/tokens_allowed) group/read_buffers_reserved (group/tokens_reserved) group/use_read_buffer_limit (group/use_token_limit) >From Intel IAA spec [1], Intel IAA does not support Read Buffer allocation control. So these sysfs attributes should not be supported on IAA device. Fix this issue by making these sysfs attributes invisible through is_visible() filter when the device is IAA. Add description in the ABI documentation to mention that these attributes are not visible when the device does not support Read Buffer allocation control. [1]: https://cdrdv2.intel.com/v1/dl/getContent/721858 Fixes: fde212e44f45 ("dmaengine: idxd: deprecate token sysfs attributes for read buffers") Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Xiaochen Shen Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221022074949.11719-1-xiaochen.shen@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 10 ++++++ drivers/dma/idxd/sysfs.c | 36 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 69e2d9155e0d9..3becc9a82bdf6 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -50,6 +50,8 @@ Description: The total number of read buffers supported by this device. The read buffers represent resources within the DSA implementation, and these resources are allocated by engines to support operations. See DSA spec v1.2 9.2.4 Total Read Buffers. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/max_transfer_size Date: Oct 25, 2019 @@ -123,6 +125,8 @@ Contact: dmaengine@vger.kernel.org Description: The maximum number of read buffers that may be in use at one time by operations that access low bandwidth memory in the device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/dsa/cmd_status Date: Aug 28, 2020 @@ -252,6 +256,8 @@ KernelVersion: 5.17.0 Contact: dmaengine@vger.kernel.org Description: Enable the use of global read buffer limit for the group. See DSA spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_allowed Date: Dec 10, 2021 @@ -260,6 +266,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates max number of read buffers that may be in use at one time by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Allowed. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./read_buffers_reserved Date: Dec 10, 2021 @@ -268,6 +276,8 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + It's not visible when the device does not support Read Buffer + allocation control. What: /sys/bus/dsa/devices/group./desc_progress_limit Date: Sept 14, 2022 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index f30aad90537bc..7c7ec8323cb7c 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -528,6 +528,22 @@ static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, !idxd->hw.group_cap.progress_limit; } +static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_group_use_token_limit.attr || + attr == &dev_attr_group_use_read_buffer_limit.attr || + attr == &dev_attr_group_tokens_allowed.attr || + attr == &dev_attr_group_read_buffers_allowed.attr || + attr == &dev_attr_group_tokens_reserved.attr || + attr == &dev_attr_group_read_buffers_reserved.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_group_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -538,6 +554,9 @@ static umode_t idxd_group_attr_visible(struct kobject *kobj, if (idxd_group_attr_progress_limit_invisible(attr, idxd)) return 0; + if (idxd_group_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1552,6 +1571,20 @@ static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + /* + * Intel IAA does not support Read Buffer allocation control, + * make these attributes invisible. + */ + return (attr == &dev_attr_max_tokens.attr || + attr == &dev_attr_max_read_buffers.attr || + attr == &dev_attr_token_limit.attr || + attr == &dev_attr_read_buffer_limit.attr) && + idxd->data->type == IDXD_TYPE_IAX; +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1561,6 +1594,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_device_attr_read_buffers_invisible(attr, idxd)) + return 0; + return attr->mode; } -- GitLab From 17a36713babe882928b869b427729c85deeb1267 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:53 +0200 Subject: [PATCH 119/694] dt-bindings: dmaengine: qcom: gpi: add compatible for SM6115 Document the compatible for GPI DMA controller on SM6115 SoC. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221030094258.486428-4-iskren.chernev@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 0c28944988456..232895fa1d8d2 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -25,6 +25,7 @@ properties: - items: - enum: - qcom,sc7280-gpi-dma + - qcom,sm6115-gpi-dma - qcom,sm8350-gpi-dma - qcom,sm8450-gpi-dma - const: qcom,sm6350-gpi-dma -- GitLab From 739153a6ae6891ff42ed9dbbd8e72dd99e6c8ba5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 2 Nov 2022 10:27:01 +0800 Subject: [PATCH 120/694] Documentation: devres: add missing devm_acpi_dma_controller_free() helper Add missing devm_acpi_dma_controller_free() to devres.rst, it's introduced by commit 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers"). Fixes: 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers") Cc: Andy Shevchenko Cc: Mika Westerberg Cc: Rafael J. Wysocki Cc: Vinod Koul Cc: Jonathan Corbet Signed-off-by: Yang Yingliang Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221102022701.1407289-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- Documentation/driver-api/driver-model/devres.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048ec..99a989ebee87e 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -437,6 +437,7 @@ SERDEV SLAVE DMA ENGINE devm_acpi_dma_controller_register() + devm_acpi_dma_controller_free() SPI devm_spi_alloc_master() -- GitLab From 4c6b3af3906d0c59497d3bfb07760f3a082b4150 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:23 +0200 Subject: [PATCH 121/694] phy: qcom-qmp-usb: fix sc8280xp PCS_USB offset The PCS_USB register block lives at an offset of 0x1000 from the PCS region on SC8280XP so add the missing offset to avoid corrupting unrelated registers on runtime suspend. Note that the current binding is broken as it does not describe the PCS_USB region and the PCS register size does not cover PCS_USB and the regions in between. As Linux currently maps full pages, simply adding the offset to driver works until the binding has been fixed. Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index cd167508f5281..2e1f5dd7e76db 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1682,6 +1682,7 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x1000, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { -- GitLab From 9fe6b4e87c225cfd8aad1be0e73be86611c35b01 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:24 +0200 Subject: [PATCH 122/694] phy: qcom-qmp-usb: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2e1f5dd7e76db..52b4bee867db3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2503,14 +2503,20 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, static const struct of_device_id qmp_usb_of_match_table[] = { { + .compatible = "qcom,ipq6018-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { .compatible = "qcom,ipq8074-qmp-usb3-phy", .data = &ipq8074_usb3phy_cfg, }, { .compatible = "qcom,msm8996-qmp-usb3-phy", .data = &msm8996_usb3phy_cfg, }, { - .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, + .compatible = "qcom,msm8998-qmp-usb3-phy", + .data = &msm8998_usb3phy_cfg, + }, { + .compatible = "qcom,qcm2290-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, }, { .compatible = "qcom,sc7180-qmp-usb3-phy", .data = &sc7180_usb3phy_cfg, @@ -2527,8 +2533,11 @@ static const struct of_device_id qmp_usb_of_match_table[] = { .compatible = "qcom,sdm845-qmp-usb3-uni-phy", .data = &qmp_v3_usb3_uniphy_cfg, }, { - .compatible = "qcom,msm8998-qmp-usb3-phy", - .data = &msm8998_usb3phy_cfg, + .compatible = "qcom,sdx55-qmp-usb3-uni-phy", + .data = &sdx55_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx65-qmp-usb3-uni-phy", + .data = &sdx65_usb3_uniphy_cfg, }, { .compatible = "qcom,sm8150-qmp-usb3-phy", .data = &sm8150_usb3phy_cfg, @@ -2541,12 +2550,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sm8250-qmp-usb3-uni-phy", .data = &sm8250_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-usb3-uni-phy", - .data = &sdx55_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx65-qmp-usb3-uni-phy", - .data = &sdx65_usb3_uniphy_cfg, }, { .compatible = "qcom,sm8350-qmp-usb3-phy", .data = &sm8350_usb3phy_cfg, @@ -2556,9 +2559,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = { }, { .compatible = "qcom,sm8450-qmp-usb3-phy", .data = &sm8350_usb3phy_cfg, - }, { - .compatible = "qcom,qcm2290-qmp-usb3-phy", - .data = &qcm2290_usb3phy_cfg, }, { }, }; -- GitLab From 95dd63b8988cd914c8b5e805e5599cda2cad161e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:25 +0200 Subject: [PATCH 123/694] phy: qcom-qmp-usb: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 126 ++++++++++++------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 52b4bee867db3..ead136c7bd01e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2501,69 +2501,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_usb_of_match_table[] = { - { - .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, - }, { - .compatible = "qcom,msm8996-qmp-usb3-phy", - .data = &msm8996_usb3phy_cfg, - }, { - .compatible = "qcom,msm8998-qmp-usb3-phy", - .data = &msm8998_usb3phy_cfg, - }, { - .compatible = "qcom,qcm2290-qmp-usb3-phy", - .data = &qcm2290_usb3phy_cfg, - }, { - .compatible = "qcom,sc7180-qmp-usb3-phy", - .data = &sc7180_usb3phy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-usb3-phy", - .data = &sm8150_usb3phy_cfg, - }, { - .compatible = "qcom,sc8280xp-qmp-usb3-uni-phy", - .data = &sc8280xp_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-usb3-phy", - .data = &qmp_v3_usb3phy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-usb3-uni-phy", - .data = &qmp_v3_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-usb3-uni-phy", - .data = &sdx55_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sdx65-qmp-usb3-uni-phy", - .data = &sdx65_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-usb3-phy", - .data = &sm8150_usb3phy_cfg, - }, { - .compatible = "qcom,sm8150-qmp-usb3-uni-phy", - .data = &sm8150_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-usb3-phy", - .data = &sm8250_usb3phy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-usb3-uni-phy", - .data = &sm8250_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-usb3-phy", - .data = &sm8350_usb3phy_cfg, - }, { - .compatible = "qcom,sm8350-qmp-usb3-uni-phy", - .data = &sm8350_usb3_uniphy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-usb3-phy", - .data = &sm8350_usb3phy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table); - static const struct dev_pm_ops qmp_usb_pm_ops = { SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, qmp_usb_runtime_resume, NULL) @@ -2665,6 +2602,69 @@ static int qmp_usb_probe(struct platform_device *pdev) return ret; } +static const struct of_device_id qmp_usb_of_match_table[] = { + { + .compatible = "qcom,ipq6018-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-usb3-phy", + .data = &ipq8074_usb3phy_cfg, + }, { + .compatible = "qcom,msm8996-qmp-usb3-phy", + .data = &msm8996_usb3phy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-usb3-phy", + .data = &msm8998_usb3phy_cfg, + }, { + .compatible = "qcom,qcm2290-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, + }, { + .compatible = "qcom,sc7180-qmp-usb3-phy", + .data = &sc7180_usb3phy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-usb3-phy", + .data = &sm8150_usb3phy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-usb3-uni-phy", + .data = &sc8280xp_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-usb3-phy", + .data = &qmp_v3_usb3phy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-usb3-uni-phy", + .data = &qmp_v3_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-usb3-uni-phy", + .data = &sdx55_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sdx65-qmp-usb3-uni-phy", + .data = &sdx65_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-usb3-phy", + .data = &sm8150_usb3phy_cfg, + }, { + .compatible = "qcom,sm8150-qmp-usb3-uni-phy", + .data = &sm8150_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-usb3-phy", + .data = &sm8250_usb3phy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-usb3-uni-phy", + .data = &sm8250_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-usb3-phy", + .data = &sm8350_usb3phy_cfg, + }, { + .compatible = "qcom,sm8350-qmp-usb3-uni-phy", + .data = &sm8350_usb3_uniphy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-usb3-phy", + .data = &sm8350_usb3phy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table); + static struct platform_driver qmp_usb_driver = { .probe = qmp_usb_probe, .driver = { -- GitLab From 9c9beef111a763513545a0ac0c60220fea64b063 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:26 +0200 Subject: [PATCH 124/694] phy: qcom-qmp-usb: move pm ops Move the PM ops structure next to the implementation to keep the driver callbacks grouped. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index ead136c7bd01e..2a4535494d389 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2280,6 +2280,11 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) return 0; } +static const struct dev_pm_ops qmp_usb_pm_ops = { + SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, + qmp_usb_runtime_resume, NULL) +}; + static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2501,11 +2506,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct dev_pm_ops qmp_usb_pm_ops = { - SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend, - qmp_usb_runtime_resume, NULL) -}; - static int qmp_usb_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; -- GitLab From 2a55ec4f0a048e0aa12022b52009d8d8667ee3d3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:27 +0200 Subject: [PATCH 125/694] phy: qcom-qmp-usb: merge driver data The USB QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 239 ++++++++++-------------- 1 file changed, 96 insertions(+), 143 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 2a4535494d389..4a1c7ac3f7842 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1453,60 +1453,30 @@ struct qmp_phy_cfg { unsigned int pcs_usb_offset; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pcs_usb: iomapped memory space for lane's pcs_usb - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: current PHY mode - */ -struct qmp_phy { - struct phy *phy; +struct qmp_usb { + struct device *dev; + const struct qmp_phy_cfg *cfg; + void __iomem *serdes; + void __iomem *pcs; + void __iomem *pcs_misc; + void __iomem *pcs_usb; void __iomem *tx; void __iomem *rx; - void __iomem *pcs; void __iomem *tx2; void __iomem *rx2; - void __iomem *pcs_misc; - void __iomem *pcs_usb; - struct clk *pipe_clk; - struct qcom_qmp *qmp; - enum phy_mode mode; -}; -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * @dp_com: iomapped memory space for phy's dp_com control block - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - */ -struct qcom_qmp { - struct device *dev; void __iomem *dp_com; + struct clk *pipe_clk; struct clk_bulk_data *clks; struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; - struct qmp_phy **phys; + enum phy_mode mode; + + struct phy *phy; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -1976,10 +1946,10 @@ static void qmp_usb_configure(void __iomem *base, qmp_usb_configure_lane(base, tbl, num, 0xff); } -static int qmp_usb_serdes_init(struct qmp_phy *qphy) +static int qmp_usb_serdes_init(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -1990,10 +1960,9 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy) static int qmp_usb_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs = qmp->pcs; void __iomem *dp_com = qmp->dp_com; int ret; @@ -2056,9 +2025,8 @@ static int qmp_usb_init(struct phy *phy) static int qmp_usb_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_bulk_assert(cfg->num_resets, qmp->resets); @@ -2071,19 +2039,18 @@ static int qmp_usb_exit(struct phy *phy) static int qmp_usb_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *pcs = qphy->pcs; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; - qmp_usb_serdes_init(qphy); + qmp_usb_serdes_init(qmp); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; @@ -2093,12 +2060,12 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -2122,27 +2089,27 @@ static int qmp_usb_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_usb_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2175,22 +2142,22 @@ static int qmp_usb_disable(struct phy *phy) static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_usb *qmp = phy_get_drvdata(phy); - qphy->mode = mode; + qmp->mode = mode; return 0; } -static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; u32 intr_mask; - if (qphy->mode == PHY_MODE_USB_HOST_SS || - qphy->mode == PHY_MODE_USB_DEVICE_SS) + if (qmp->mode == PHY_MODE_USB_HOST_SS || + qmp->mode == PHY_MODE_USB_DEVICE_SS) intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN; else intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL; @@ -2211,11 +2178,11 @@ static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy) qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN); } -static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_usb_disable_autonomous_mode(struct qmp_usb *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; /* Disable i/o clamp_n on resume for normal mode */ if (pcs_misc) @@ -2231,20 +2198,19 @@ static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; - dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode); - if (!qphy->phy->init_count) { + if (!qmp->phy->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } - qmp_usb_enable_autonomous_mode(qphy); + qmp_usb_enable_autonomous_mode(qmp); - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return 0; @@ -2252,14 +2218,13 @@ static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev) static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret = 0; - dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode); - if (!qphy->phy->init_count) { + if (!qmp->phy->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } @@ -2268,14 +2233,14 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(dev, "pipe_clk enable failed, err=%d\n", ret); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return ret; } - qmp_usb_disable_autonomous_mode(qphy); + qmp_usb_disable_autonomous_mode(qmp); return 0; } @@ -2287,7 +2252,7 @@ static const struct dev_pm_ops qmp_usb_pm_ops = { static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -2303,7 +2268,7 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2324,7 +2289,7 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2361,7 +2326,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2420,13 +2385,11 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static -int qmp_usb_create(struct device *dev, struct device_node *np, int id, +static int qmp_usb_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_usb *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; bool exclusive = true; int ret; @@ -2439,81 +2402,75 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id, if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy")) exclusive = false; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = qmp_usb_iomap(dev, np, 2, exclusive); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = qmp_usb_iomap(dev, np, 2, exclusive); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->pcs_usb_offset) - qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; + qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset; if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc)) { + if (IS_ERR(qmp->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - qphy->pcs_misc = NULL; + qmp->pcs_misc = NULL; } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe clock\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_usb_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_usb_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_usb *qmp; int num, id; int ret; @@ -2555,10 +2512,6 @@ static int qmp_usb_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2572,7 +2525,7 @@ static int qmp_usb_probe(struct platform_device *pdev) id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_usb_create(dev, child, id, serdes, cfg); + ret = qmp_usb_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); -- GitLab From 8ec02ba8493639c721fc63ed05c06891061ef9b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:28 +0200 Subject: [PATCH 126/694] phy: qcom-qmp-usb: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 40 ++++++++----------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 4a1c7ac3f7842..05ceab23258a1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2471,7 +2471,6 @@ static int qmp_usb_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_usb *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2507,44 +2506,29 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) - return ret; + goto err_node_put; /* * Prevent runtime pm from being ON by default. Users can enable * it using power/control in sysfs. */ pm_runtime_forbid(dev); - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_usb_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } - - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } - - id++; - } + ret = qmp_usb_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; + + ret = phy_pipe_clk_register(qmp, child); + if (ret) + goto err_node_put; + + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From 413db06c05e729639e9b64cf7ab5d918b8182006 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:29 +0200 Subject: [PATCH 127/694] phy: qcom-qmp-usb: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 05ceab23258a1..d3c0b994b9395 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2250,9 +2250,10 @@ static const struct dev_pm_ops qmp_usb_pm_ops = { qmp_usb_runtime_resume, NULL) }; -static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_vreg_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -2266,9 +2267,10 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_reset_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2287,9 +2289,10 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) return 0; } -static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_usb_clk_init(struct qmp_usb *qmp) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2385,10 +2388,10 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static int qmp_usb_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) { - struct qmp_usb *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; bool exclusive = true; int ret; @@ -2402,8 +2405,6 @@ static int qmp_usb_create(struct device *dev, struct device_node *np, if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy")) exclusive = false; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2468,8 +2469,6 @@ static int qmp_usb_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_usb *qmp; int ret; @@ -2478,31 +2477,30 @@ static int qmp_usb_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - if (cfg->has_phy_dp_com_ctrl) { + if (qmp->cfg->has_phy_dp_com_ctrl) { qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); } - ret = qmp_usb_clk_init(dev, cfg); + ret = qmp_usb_clk_init(qmp); if (ret) return ret; - ret = qmp_usb_reset_init(dev, cfg); + ret = qmp_usb_reset_init(qmp); if (ret) return ret; - ret = qmp_usb_vreg_init(dev, cfg); + ret = qmp_usb_vreg_init(qmp); if (ret) return ret; @@ -2520,7 +2518,7 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_create(dev, child, serdes, cfg); + ret = qmp_usb_create(qmp, child); if (ret) goto err_node_put; -- GitLab From 8fe2b2b745a123640ead94743cf28c380fec9b58 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:30 +0200 Subject: [PATCH 128/694] phy: qcom-qmp-usb: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index d3c0b994b9395..0158399920b86 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2149,6 +2149,13 @@ static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_usb_phy_ops = { + .init = qmp_usb_enable, + .exit = qmp_usb_disable, + .set_mode = qmp_usb_set_mode, + .owner = THIS_MODULE, +}; + static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2366,13 +2373,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_usb_ops = { - .init = qmp_usb_enable, - .exit = qmp_usb_disable, - .set_mode = qmp_usb_set_mode, - .owner = THIS_MODULE, -}; - static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, int index, bool exclusive) { @@ -2451,7 +2451,7 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_usb_ops); + generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); -- GitLab From 876420fb7b98934a4f78d8976c7ff095a13c90b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:31 +0200 Subject: [PATCH 129/694] phy: qcom-qmp-usb: clean up PHY init Clean up the PHY initialisation somewhat by programming both tx and rx for the second lane after the first lane. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0158399920b86..cf1e04e9daf7a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2058,14 +2058,12 @@ static int qmp_usb_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - if (cfg->lanes >= 2) + if (cfg->lanes >= 2) { + qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + } qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); -- GitLab From 500e9d37fb9ead52498af1b035933c62a487efe8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:32 +0200 Subject: [PATCH 130/694] dt-bindings: phy: qcom,qmp-usb: rename current bindings The current QMP USB PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. Later QMP USB PHY blocks only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current bindings after MSM8996 and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...com,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} (97%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml similarity index 97% rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml index 7acb4b7de7f94..58ac84de8eee5 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-usb-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-usb3-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (USB) +title: Qualcomm QMP PHY controller (USB, MSM8996) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-usb3-uni-phy.yaml. + properties: compatible: enum: -- GitLab From e8e58e29a0c9310a917448d0c4a1857f0dbfd917 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:33 +0200 Subject: [PATCH 131/694] dt-bindings: phy: qcom,qmp-usb: fix sc8280xp binding The current QMP USB PHY bindings are based on the original MSM8996 PCIe PHY binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB PHY block only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). Note that PCS_USB region is also not described by the current bindings despite being used by the driver and this has led to people increasing the size of the PCS region in the devicetree so that it includes PCS_USB registers even though other regions like TX and RX may lie in between. Add a new binding for the QMP USB PHYs found on SC8280XP which further bindings can be based on. Note that this also fixes the SC8280XP "phy_phy" reset name. Also note that the current binding is simply removed instead of being deprecated as it was only recently merged and support for SC8280XP is still under development. And, specifically, there is no support in mainline for the multiport controller that uses these PHYs. Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221028160435.26948-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,msm8996-qmp-usb3-phy.yaml | 13 --- .../phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml | 105 ++++++++++++++++++ 2 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml index 58ac84de8eee5..0c6b3ba7346b9 100644 --- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml @@ -26,7 +26,6 @@ properties: - qcom,qcm2290-qmp-usb3-phy - qcom,sc7180-qmp-usb3-phy - qcom,sc8180x-qmp-usb3-phy - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sdm845-qmp-usb3-phy - qcom,sdm845-qmp-usb3-uni-phy - qcom,sdx55-qmp-usb3-uni-phy @@ -204,7 +203,6 @@ allOf: compatible: contains: enum: - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sm8150-qmp-usb3-phy - qcom,sm8150-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy @@ -271,16 +269,6 @@ allOf: - const: phy_phy - const: phy - - if: - properties: - compatible: - contains: - enum: - - qcom,sc8280xp-qmp-usb3-uni-phy - then: - required: - - power-domains - - if: properties: compatible: @@ -352,7 +340,6 @@ allOf: contains: enum: - qcom,msm8996-qmp-usb3-phy - - qcom,sc8280xp-qmp-usb3-uni-phy - qcom,sm8250-qmp-usb3-uni-phy - qcom,sm8350-qmp-usb3-uni-phy then: diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml new file mode 100644 index 0000000000000..ef080509747ae --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (USB, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-usb3-uni-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 5 + + clock-names: + items: + - const: aux + - const: ref_clk_src + - const: ref + - const: com_aux + - const: pipe + + power-domains: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: phy + - const: phy_phy + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#clock-cells": + const: 0 + + clock-output-names: + maxItems: 1 + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - clock-output-names + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + #include + + phy@88ef000 { + compatible = "qcom,sc8280xp-qmp-usb3-uni-phy"; + reg = <0x088ef000 0x2000>; + + clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_USB3_MP0_CLKREF_CLK>, + <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>; + clock-names = "aux", "ref_clk_src", "ref", "com_aux", + "pipe"; + + power-domains = <&gcc USB30_MP_GDSC>; + + resets = <&gcc GCC_USB3_UNIPHY_MP0_BCR>, + <&gcc GCC_USB3UNIPHY_PHY_MP0_BCR>; + reset-names = "phy", "phy_phy"; + + vdda-phy-supply = <&vreg_l3a>; + vdda-pll-supply = <&vreg_l5a>; + + #clock-cells = <0>; + clock-output-names = "usb2_phy0_pipe_clk"; + + #phy-cells = <0>; + }; -- GitLab From 183462e8c92cce5cbfabecc1719fb61c61b70833 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:34 +0200 Subject: [PATCH 132/694] phy: qcom-qmp-usb: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes and dp_com resources in what is now the legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index cf1e04e9daf7a..fa87e90a821eb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2386,13 +2386,22 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, return devm_of_iomap(dev, np, index, NULL); } -static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) +static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; bool exclusive = true; - int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); + + if (cfg->has_phy_dp_com_ctrl) { + qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->dp_com)) + return PTR_ERR(qmp->dp_com); + } /* * FIXME: These bindings should be fixed to not rely on overlapping @@ -2449,16 +2458,6 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -2480,16 +2479,6 @@ static int qmp_usb_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - - if (qmp->cfg->has_phy_dp_com_ctrl) { - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); - } - ret = qmp_usb_clk_init(qmp); if (ret) return ret; @@ -2516,7 +2505,7 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_create(qmp, child); + ret = qmp_usb_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; @@ -2524,6 +2513,15 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) goto err_node_put; + qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From c0a6c25283672facaa57cb3daad71c6586736312 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 28 Oct 2022 18:04:35 +0200 Subject: [PATCH 133/694] phy: qcom-qmp-usb: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. This includes the PCS_USB region which was initially overlooked. Note that the driver will no longer accept the old binding due to the fixed "phy_phy" reset name. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221028160435.26948-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 82 ++++++++++++++++++++----- 1 file changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index fa87e90a821eb..55029ea63f731 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1414,10 +1414,20 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +struct qmp_usb_offsets { + u16 serdes; + u16 pcs; + u16 pcs_usb; + u16 tx; + u16 rx; +}; + /* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { int lanes; + const struct qmp_usb_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -1548,6 +1558,14 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_usb_offsets qmp_usb_offsets_v5 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_usb = 0x1200, + .tx = 0x0e00, + .rx = 0x1000, +}; + static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, @@ -1637,6 +1655,8 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .lanes = 1, + .offsets = &qmp_usb_offsets_v5, + .serdes_tbl = sc8280xp_usb3_uniphy_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_serdes_tbl), .tx_tbl = sc8280xp_usb3_uniphy_tx_tbl, @@ -1647,12 +1667,11 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .reset_list = qcm2290_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(qcm2290_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x1000, }; static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = { @@ -2461,11 +2480,41 @@ static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np) return 0; } +static int qmp_usb_parse_dt(struct qmp_usb *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_usb_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->pcs_usb = base + offs->pcs_usb; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + qmp->pipe_clk = devm_clk_get(dev, "pipe"); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); + } + + return 0; +} + static int qmp_usb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_usb *qmp; int ret; @@ -2491,9 +2540,16 @@ static int qmp_usb_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_usb_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_usb_parse_dt(qmp); + } + if (ret) + goto err_node_put; pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); @@ -2505,15 +2561,11 @@ static int qmp_usb_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_usb_parse_dt_legacy(qmp, child); - if (ret) - goto err_node_put; - - ret = phy_pipe_clk_register(qmp, child); + ret = phy_pipe_clk_register(qmp, np); if (ret) goto err_node_put; - qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qmp_usb_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -2522,14 +2574,14 @@ static int qmp_usb_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } -- GitLab From 32fb07f35675c4c3311ae370471ee1ae6cc3e694 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:13 -0700 Subject: [PATCH 134/694] phy: usb: Improve port mode selection Split port modes into two different variables. Supported port modes is what the hardware supports. While port mode is how the hardware is currently configured and can be dynamically changed through the sysfs. We initialize all supported port modes on init even though the port mode may not be selected because we cannot guarantee the downstream interface from the phy will be active or not. This also fixes an issue where port modes selected via sysfs were not being saved through suspend/resume. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-2-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 12 ++++++------ drivers/phy/broadcom/phy-brcm-usb-init.c | 10 +++++----- drivers/phy/broadcom/phy-brcm-usb-init.h | 10 +++++----- drivers/phy/broadcom/phy-brcm-usb.c | 14 +++++++++----- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index d2524b70ea161..430a8ae0cd24b 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -195,10 +195,10 @@ static void usb_init_common(struct brcm_usb_init_params *params) if (USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE)) { reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE); - reg |= params->mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } - switch (params->mode) { + switch (params->supported_port_modes) { case USB_CTLR_MODE_HOST: USB_CTRL_UNSET(ctrl, USB_PM, BDC_SOFT_RESETB); break; @@ -276,7 +276,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) /* Set the PHY_MODE */ reg = brcm_usb_readl(usb_phy + USB_PHY_UTMI_CTL_1); reg &= ~USB_PHY_UTMI_CTL_1_PHY_MODE_MASK; - reg |= params->mode << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT; + reg |= params->supported_port_modes << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT; brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1); usb_init_common(params); @@ -286,7 +286,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) * the default "Read Transaction Size" of 6 (1024 bytes). * Set it to 4 (256 bytes). */ - if ((params->mode != USB_CTLR_MODE_HOST) && bdc_ec) { + if ((params->supported_port_modes != USB_CTLR_MODE_HOST) && bdc_ec) { reg = brcm_usb_readl(bdc_ec + BDC_EC_AXIRDA); reg &= ~BDC_EC_AXIRDA_RTS_MASK; reg |= (0x4 << BDC_EC_AXIRDA_RTS_SHIFT); @@ -385,7 +385,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params) return reg; } -static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) +static void usb_set_dual_select(struct brcm_usb_init_params *params) { void __iomem *ctrl = params->regs[BRCM_REGS_CTRL]; u32 reg; @@ -394,7 +394,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE); - reg |= mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index dddcbd3cd5f3d..a7f8b3d3264d5 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -876,11 +876,11 @@ static void usb_init_common(struct brcm_usb_init_params *params) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1, PORT_MODE); - reg |= params->mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } if (USB_CTRL_MASK_FAMILY(params, USB_PM, BDC_SOFT_RESETB)) { - switch (params->mode) { + switch (params->supported_port_modes) { case USB_CTLR_MODE_HOST: USB_CTRL_UNSET_FAMILY(params, USB_PM, BDC_SOFT_RESETB); break; @@ -891,7 +891,7 @@ static void usb_init_common(struct brcm_usb_init_params *params) } } if (USB_CTRL_MASK_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE)) { - if (params->mode == USB_CTLR_MODE_TYPEC_PD) + if (params->supported_port_modes == USB_CTLR_MODE_TYPEC_PD) USB_CTRL_SET_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE); else USB_CTRL_UNSET_FAMILY(params, SETUP, @@ -1000,7 +1000,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params) return reg; } -static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) +static void usb_set_dual_select(struct brcm_usb_init_params *params) { void __iomem *ctrl = params->regs[BRCM_REGS_CTRL]; u32 reg; @@ -1011,7 +1011,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode) reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1, PORT_MODE); - reg |= mode; + reg |= params->port_mode; brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1)); } } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index 1ccb5ddab865c..bedf2b8e2f196 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -45,14 +45,15 @@ struct brcm_usb_init_ops { void (*uninit_eohci)(struct brcm_usb_init_params *params); void (*uninit_xhci)(struct brcm_usb_init_params *params); int (*get_dual_select)(struct brcm_usb_init_params *params); - void (*set_dual_select)(struct brcm_usb_init_params *params, int mode); + void (*set_dual_select)(struct brcm_usb_init_params *params); }; struct brcm_usb_init_params { void __iomem *regs[BRCM_REGS_MAX]; int ioc; int ipp; - int mode; + int supported_port_modes; + int port_mode; u32 family_id; u32 product_id; int selected_family; @@ -153,11 +154,10 @@ static inline int brcm_usb_get_dual_select(struct brcm_usb_init_params *ini) return 0; } -static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini, - int mode) +static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini) { if (ini->ops->set_dual_select) - ini->ops->set_dual_select(ini, mode); + ini->ops->set_dual_select(ini); } #endif /* _USB_BRCM_COMMON_INIT_H */ diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 2cb3779fcdf82..99d4deabfd976 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -233,7 +233,7 @@ static ssize_t dr_mode_show(struct device *dev, return sprintf(buf, "%s\n", value_to_name(&brcm_dr_mode_to_name[0], ARRAY_SIZE(brcm_dr_mode_to_name), - priv->ini.mode)); + priv->ini.supported_port_modes)); } static DEVICE_ATTR_RO(dr_mode); @@ -249,7 +249,8 @@ static ssize_t dual_select_store(struct device *dev, res = name_to_value(&brcm_dual_mode_to_name[0], ARRAY_SIZE(brcm_dual_mode_to_name), buf, &value); if (!res) { - brcm_usb_set_dual_select(&priv->ini, value); + priv->ini.port_mode = value; + brcm_usb_set_dual_select(&priv->ini); res = len; } mutex_unlock(&sysfs_lock); @@ -495,13 +496,16 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) of_property_read_u32(dn, "brcm,ipp", &priv->ini.ipp); of_property_read_u32(dn, "brcm,ioc", &priv->ini.ioc); - priv->ini.mode = USB_CTLR_MODE_HOST; + priv->ini.supported_port_modes = USB_CTLR_MODE_HOST; err = of_property_read_string(dn, "dr_mode", &mode); if (err == 0) { name_to_value(&brcm_dr_mode_to_name[0], ARRAY_SIZE(brcm_dr_mode_to_name), - mode, &priv->ini.mode); + mode, &priv->ini.supported_port_modes); } + /* Default port_mode to supported port_modes */ + priv->ini.port_mode = priv->ini.supported_port_modes; + if (of_property_read_bool(dn, "brcm,has-xhci")) priv->has_xhci = true; if (of_property_read_bool(dn, "brcm,has-eohci")) @@ -539,7 +543,7 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) * Create sysfs entries for mode. * Remove "dual_select" attribute if not in dual mode */ - if (priv->ini.mode != USB_CTLR_MODE_DRD) + if (priv->ini.supported_port_modes != USB_CTLR_MODE_DRD) brcm_usb_phy_attrs[1] = NULL; err = sysfs_create_group(&dev->kobj, &brcm_usb_phy_group); if (err) -- GitLab From f7fc5b7090372fc4dd7798c874635ca41b8ba733 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 5 Oct 2022 14:30:14 -0700 Subject: [PATCH 135/694] phy: usb: s2 WoL wakeup_count not incremented for USB->Eth devices The PHY's "wakeup_count" is not incrementing when waking from WoL. The wakeup count can be found in sysfs at: /sys/bus/platform/devices/rdb/*.usb-phy/power/wakeup_count. The problem is that the system wakup event handler was being passed the wrong "device" by the PHY driver. Fixes: f1c0db40a3ad ("phy: usb: Add "wake on" functionality") Signed-off-by: Al Cooper Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-3-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 99d4deabfd976..d97fa58cd63a6 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -102,9 +102,9 @@ static int brcm_pm_notifier(struct notifier_block *notifier, static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id) { - struct phy *gphy = dev_id; + struct device *dev = dev_id; - pm_wakeup_event(&gphy->dev, 0); + pm_wakeup_event(dev, 0); return IRQ_HANDLED; } @@ -452,7 +452,7 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev, if (priv->wake_irq >= 0) { err = devm_request_irq(dev, priv->wake_irq, brcm_usb_phy_wake_isr, 0, - dev_name(dev), gphy); + dev_name(dev), dev); if (err < 0) return err; device_set_wakeup_capable(dev, 1); -- GitLab From 7e81153d0f16dd7e6f571bd168bc3d8b46f9f5b7 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:15 -0700 Subject: [PATCH 136/694] phy: usb: Migrate to BIT and BITMASK macros Using BIT and BITMASK macros makes it much easier to read and make modifications. Also reordered some constants to be in numerical order. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-4-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- .../phy/broadcom/phy-brcm-usb-init-synopsys.c | 68 ++++++++-------- drivers/phy/broadcom/phy-brcm-usb-init.c | 80 +++++++++---------- 2 files changed, 74 insertions(+), 74 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 430a8ae0cd24b..26e9585eca60c 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -18,14 +18,14 @@ #define PIARBCTL_CAM 0x00 #define PIARBCTL_SPLITTER 0x04 #define PIARBCTL_MISC 0x08 -#define PIARBCTL_MISC_SECURE_MASK 0x80000000 -#define PIARBCTL_MISC_USB_SELECT_MASK 0x40000000 -#define PIARBCTL_MISC_USB_4G_SDRAM_MASK 0x20000000 -#define PIARBCTL_MISC_USB_PRIORITY_MASK 0x000f0000 -#define PIARBCTL_MISC_USB_MEM_PAGE_MASK 0x0000f000 -#define PIARBCTL_MISC_CAM1_MEM_PAGE_MASK 0x00000f00 -#define PIARBCTL_MISC_CAM0_MEM_PAGE_MASK 0x000000f0 -#define PIARBCTL_MISC_SATA_PRIORITY_MASK 0x0000000f +#define PIARBCTL_MISC_SATA_PRIORITY_MASK GENMASK(3, 0) +#define PIARBCTL_MISC_CAM0_MEM_PAGE_MASK GENMASK(7, 4) +#define PIARBCTL_MISC_CAM1_MEM_PAGE_MASK GENMASK(11, 8) +#define PIARBCTL_MISC_USB_MEM_PAGE_MASK GENMASK(15, 12) +#define PIARBCTL_MISC_USB_PRIORITY_MASK GENMASK(19, 16) +#define PIARBCTL_MISC_USB_4G_SDRAM_MASK BIT(29) +#define PIARBCTL_MISC_USB_SELECT_MASK BIT(30) +#define PIARBCTL_MISC_SECURE_MASK BIT(31) #define PIARBCTL_MISC_USB_ONLY_MASK \ (PIARBCTL_MISC_USB_SELECT_MASK | \ @@ -35,46 +35,46 @@ /* Register definitions for the USB CTRL block */ #define USB_CTRL_SETUP 0x00 -#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK 0x02000000 -#define USB_CTRL_SETUP_SCB2_EN_MASK 0x00008000 -#define USB_CTRL_SETUP_tca_drv_sel_MASK 0x01000000 -#define USB_CTRL_SETUP_SCB1_EN_MASK 0x00004000 -#define USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK 0x00000200 -#define USB_CTRL_SETUP_IPP_MASK 0x00000020 -#define USB_CTRL_SETUP_IOC_MASK 0x00000010 +#define USB_CTRL_SETUP_IOC_MASK BIT(4) +#define USB_CTRL_SETUP_IPP_MASK BIT(5) +#define USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK BIT(9) +#define USB_CTRL_SETUP_SCB1_EN_MASK BIT(14) +#define USB_CTRL_SETUP_SCB2_EN_MASK BIT(15) +#define USB_CTRL_SETUP_tca_drv_sel_MASK BIT(24) +#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK BIT(25) #define USB_CTRL_USB_PM 0x04 -#define USB_CTRL_USB_PM_USB_PWRDN_MASK 0x80000000 -#define USB_CTRL_USB_PM_SOFT_RESET_MASK 0x40000000 -#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK 0x00800000 -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK 0x00400000 -#define USB_CTRL_USB_PM_XHC_PME_EN_MASK 0x00000010 -#define USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK 0x00000008 +#define USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK BIT(3) +#define USB_CTRL_USB_PM_XHC_PME_EN_MASK BIT(4) +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK BIT(22) +#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK BIT(23) +#define USB_CTRL_USB_PM_SOFT_RESET_MASK BIT(30) +#define USB_CTRL_USB_PM_USB_PWRDN_MASK BIT(31) #define USB_CTRL_USB_PM_STATUS 0x08 #define USB_CTRL_USB_DEVICE_CTL1 0x10 -#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK 0x00000003 +#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK GENMASK(1, 0) #define USB_CTRL_TEST_PORT_CTL 0x30 -#define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK 0x000000ff +#define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK GENMASK(7, 0) #define USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_PME_GEN_MASK 0x0000002e #define USB_CTRL_TP_DIAG1 0x34 -#define USB_CTLR_TP_DIAG1_wake_MASK 0x00000002 +#define USB_CTLR_TP_DIAG1_wake_MASK BIT(1) #define USB_CTRL_CTLR_CSHCR 0x50 -#define USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK 0x00040000 +#define USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK BIT(18) /* Register definitions for the USB_PHY block in 7211b0 */ #define USB_PHY_PLL_CTL 0x00 -#define USB_PHY_PLL_CTL_PLL_RESETB_MASK 0x40000000 +#define USB_PHY_PLL_CTL_PLL_RESETB_MASK BIT(30) #define USB_PHY_PLL_LDO_CTL 0x08 -#define USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK 0x00000004 -#define USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK 0x00000002 -#define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK 0x00000001 +#define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK BIT(0) +#define USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK BIT(1) +#define USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK BIT(2) #define USB_PHY_UTMI_CTL_1 0x04 -#define USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK 0x00000800 -#define USB_PHY_UTMI_CTL_1_PHY_MODE_MASK 0x0000000c +#define USB_PHY_UTMI_CTL_1_PHY_MODE_MASK GENMASK(3, 2) #define USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT 2 +#define USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK BIT(11) #define USB_PHY_IDDQ 0x1c -#define USB_PHY_IDDQ_phy_iddq_MASK 0x00000001 +#define USB_PHY_IDDQ_phy_iddq_MASK BIT(0) #define USB_PHY_STATUS 0x20 -#define USB_PHY_STATUS_pll_lock_MASK 0x00000001 +#define USB_PHY_STATUS_pll_lock_MASK BIT(0) /* Register definitions for the MDIO registers in the DWC2 block of * the 7211b0. @@ -86,7 +86,7 @@ /* Register definitions for the BDC EC block in 7211b0 */ #define BDC_EC_AXIRDA 0x0c -#define BDC_EC_AXIRDA_RTS_MASK 0xf0000000 +#define BDC_EC_AXIRDA_RTS_MASK GENMASK(31, 28) #define BDC_EC_AXIRDA_RTS_SHIFT 28 diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index a7f8b3d3264d5..a1ca83308f98d 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -21,57 +21,57 @@ /* Register definitions for the USB CTRL block */ #define USB_CTRL_SETUP 0x00 -#define USB_CTRL_SETUP_IOC_MASK 0x00000010 -#define USB_CTRL_SETUP_IPP_MASK 0x00000020 -#define USB_CTRL_SETUP_BABO_MASK 0x00000001 -#define USB_CTRL_SETUP_FNHW_MASK 0x00000002 -#define USB_CTRL_SETUP_FNBO_MASK 0x00000004 -#define USB_CTRL_SETUP_WABO_MASK 0x00000008 -#define USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK 0x00002000 /* option */ -#define USB_CTRL_SETUP_SCB1_EN_MASK 0x00004000 /* option */ -#define USB_CTRL_SETUP_SCB2_EN_MASK 0x00008000 /* option */ -#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK 0X00020000 /* option */ -#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK 0x00010000 /* option */ -#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK 0x02000000 /* option */ -#define USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK 0x04000000 /* option */ -#define USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK 0x08000000 /* opt */ -#define USB_CTRL_SETUP_OC3_DISABLE_MASK 0xc0000000 /* option */ +#define USB_CTRL_SETUP_BABO_MASK BIT(0) +#define USB_CTRL_SETUP_FNHW_MASK BIT(1) +#define USB_CTRL_SETUP_FNBO_MASK BIT(2) +#define USB_CTRL_SETUP_WABO_MASK BIT(3) +#define USB_CTRL_SETUP_IOC_MASK BIT(4) +#define USB_CTRL_SETUP_IPP_MASK BIT(5) +#define USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK BIT(13) /* option */ +#define USB_CTRL_SETUP_SCB1_EN_MASK BIT(14) /* option */ +#define USB_CTRL_SETUP_SCB2_EN_MASK BIT(15) /* option */ +#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK BIT(17) /* option */ +#define USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK BIT(16) /* option */ +#define USB_CTRL_SETUP_STRAP_IPP_SEL_MASK BIT(25) /* option */ +#define USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK BIT(26) /* option */ +#define USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK BIT(27) /* opt */ +#define USB_CTRL_SETUP_OC3_DISABLE_MASK GENMASK(31, 30) /* option */ #define USB_CTRL_PLL_CTL 0x04 -#define USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK 0x08000000 -#define USB_CTRL_PLL_CTL_PLL_RESETB_MASK 0x40000000 -#define USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK 0x80000000 /* option */ +#define USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK BIT(27) +#define USB_CTRL_PLL_CTL_PLL_RESETB_MASK BIT(30) +#define USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK BIT(31) /* option */ #define USB_CTRL_EBRIDGE 0x0c -#define USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK 0x00020000 /* option */ -#define USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK 0x00000f80 /* option */ +#define USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK GENMASK(11, 7) /* option */ +#define USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK BIT(17) /* option */ #define USB_CTRL_OBRIDGE 0x10 -#define USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK 0x08000000 +#define USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK BIT(27) #define USB_CTRL_MDIO 0x14 #define USB_CTRL_MDIO2 0x18 #define USB_CTRL_UTMI_CTL_1 0x2c -#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK 0x00000800 -#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK 0x08000000 +#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK BIT(11) +#define USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK BIT(27) #define USB_CTRL_USB_PM 0x34 -#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK 0x00800000 /* option */ -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK 0x00400000 /* option */ -#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK 0x40000000 /* option */ -#define USB_CTRL_USB_PM_USB_PWRDN_MASK 0x80000000 /* option */ -#define USB_CTRL_USB_PM_SOFT_RESET_MASK 0x40000000 /* option */ -#define USB_CTRL_USB_PM_USB20_HC_RESETB_MASK 0x30000000 /* option */ -#define USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK 0x00300000 /* option */ -#define USB_CTRL_USB_PM_RMTWKUP_EN_MASK 0x00000001 +#define USB_CTRL_USB_PM_RMTWKUP_EN_MASK BIT(0) +#define USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK GENMASK(21, 20) /* option */ +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK BIT(22) /* option */ +#define USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK BIT(23) /* option */ +#define USB_CTRL_USB_PM_USB20_HC_RESETB_MASK GENMASK(29, 28) /* option */ +#define USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK BIT(30) /* option */ +#define USB_CTRL_USB_PM_SOFT_RESET_MASK BIT(30) /* option */ +#define USB_CTRL_USB_PM_USB_PWRDN_MASK BIT(31) /* option */ #define USB_CTRL_USB_PM_STATUS 0x38 #define USB_CTRL_USB30_CTL1 0x60 -#define USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK 0x00000010 -#define USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK 0x00010000 -#define USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK 0x00020000 /* option */ -#define USB_CTRL_USB30_CTL1_USB3_IOC_MASK 0x10000000 /* option */ -#define USB_CTRL_USB30_CTL1_USB3_IPP_MASK 0x20000000 /* option */ +#define USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK BIT(4) +#define USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK BIT(16) +#define USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK BIT(17) /* option */ +#define USB_CTRL_USB30_CTL1_USB3_IOC_MASK BIT(28) /* option */ +#define USB_CTRL_USB30_CTL1_USB3_IPP_MASK BIT(29) /* option */ #define USB_CTRL_USB30_PCTL 0x70 -#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK 0x00000002 -#define USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK 0x00008000 -#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK 0x00020000 +#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK BIT(1) +#define USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK BIT(15) +#define USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK BIT(17) #define USB_CTRL_USB_DEVICE_CTL1 0x90 -#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK 0x00000003 /* option */ +#define USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK GENMASK(1, 0) /* option */ /* Register definitions for the XHCI EC block */ #define USB_XHCI_EC_IRAADR 0x658 -- GitLab From 833c173ebab420997b98a0d888fc5b55ee4a8a7e Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:16 -0700 Subject: [PATCH 137/694] phy: usb: Disable phy auto-suspend The BDC block requires the PLL lock in order to grab the PLL clock. The phy auto-suspend feature turns off the phy when nothing is attached leading to the PLL to not lock. This leads the BDC block to grab the AUX clock instead of the PLL clock. This is not ideal, so lets turn this feature off. Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-5-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 26e9585eca60c..6a4d47886e0e7 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -62,6 +62,7 @@ /* Register definitions for the USB_PHY block in 7211b0 */ #define USB_PHY_PLL_CTL 0x00 +#define USB_PHY_PLL_CTL_PLL_SUSPEND_MASK BIT(27) #define USB_PHY_PLL_CTL_PLL_RESETB_MASK BIT(30) #define USB_PHY_PLL_LDO_CTL 0x08 #define USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK BIT(0) @@ -259,6 +260,11 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params) brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1); } + /* Disable PLL auto suspend */ + reg = brcm_usb_readl(usb_phy + USB_PHY_PLL_CTL); + reg |= USB_PHY_PLL_CTL_PLL_SUSPEND_MASK; + brcm_usb_writel(reg, usb_phy + USB_PHY_PLL_CTL); + /* Init the PHY */ reg = USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK | USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK | -- GitLab From 700c44b508020a3ea29d297c677f8d4ab14b7e6a Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:17 -0700 Subject: [PATCH 138/694] phy: usb: Use slow clock for wake enabled suspend The logic was incorrect when switching to slow clock. We want the slow clock if wake_enabled is set. Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-6-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index 6a4d47886e0e7..f78cff24eec8d 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -337,13 +337,12 @@ static void usb_uninit_common_7216(struct brcm_usb_init_params *params) pr_debug("%s\n", __func__); - if (!params->wake_enabled) { - USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN); - + if (params->wake_enabled) { /* Switch to using slower clock during suspend to save power */ USB_CTRL_SET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN); - } else { usb_wake_enable_7216(params, true); + } else { + USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN); } } -- GitLab From 8484199c09347bdd5d81ee8a2bc530850f900797 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Wed, 5 Oct 2022 14:30:18 -0700 Subject: [PATCH 139/694] phy: usb: Fix clock imbalance for suspend/resume We should be disabling clocks when wake from USB is not needed. Since this wasn't done, we had a clock imbalance since clocks were always being enabled on resume. Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers") Fixes: b0c0b66c0b43 ("phy: usb: Add support for wake and USB low power mode for 7211 S2/S5") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/1665005418-15807-7-git-send-email-justinpopo6@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 2 -- drivers/phy/broadcom/phy-brcm-usb-init.h | 1 - drivers/phy/broadcom/phy-brcm-usb.c | 8 +++++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c index f78cff24eec8d..76cf4280d7ed9 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c @@ -430,7 +430,6 @@ void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params) params->family_name = "7216"; params->ops = &bcm7216_ops; - params->suspend_with_clocks = true; } void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params) @@ -440,5 +439,4 @@ void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params) params->family_name = "7211"; params->ops = &bcm7211b0_ops; - params->suspend_with_clocks = true; } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index bedf2b8e2f196..f9fbf8fb80e54 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -62,7 +62,6 @@ struct brcm_usb_init_params { const struct brcm_usb_init_ops *ops; struct regmap *syscon_piarbctl; bool wake_enabled; - bool suspend_with_clocks; }; void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params); diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index d97fa58cd63a6..aafba4a047018 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -602,7 +602,7 @@ static int brcm_usb_phy_suspend(struct device *dev) * and newer XHCI->2.0-clks/3.0-clks. */ - if (!priv->ini.suspend_with_clocks) { + if (!priv->ini.wake_enabled) { if (priv->phys[BRCM_USB_PHY_3_0].inited) clk_disable_unprepare(priv->usb_30_clk); if (priv->phys[BRCM_USB_PHY_2_0].inited || @@ -619,8 +619,10 @@ static int brcm_usb_phy_resume(struct device *dev) { struct brcm_usb_phy_data *priv = dev_get_drvdata(dev); - clk_prepare_enable(priv->usb_20_clk); - clk_prepare_enable(priv->usb_30_clk); + if (!priv->ini.wake_enabled) { + clk_prepare_enable(priv->usb_20_clk); + clk_prepare_enable(priv->usb_30_clk); + } brcm_usb_init_ipp(&priv->ini); /* -- GitLab From 6964affe65066651eca21e97247d3b7cac5153dc Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:53 +0000 Subject: [PATCH 140/694] dt-bindings: phy: Add special clock for Allwinner H616 PHY The USB PHY IP in the Allwinner H616 SoC requires a quirk that involves some resources from port 2's PHY and HCI IP. In particular the PMU clock for port 2 must be surely ungated before accessing the REG_HCI_PHY_CTL register of port 2. To allow each USB port to be controlled independently of port 2, we need a handle to that particular PMU clock in the *PHY* node, as the HCI and PHY part might be handled by separate drivers. Add that clock to the requirements of the H616 PHY binding, so that a PHY driver can apply the quirk in isolation, without requiring help from port 2's HCI driver. Signed-off-by: Andre Przywara Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221031111358.3387297-3-andre.przywara@arm.com Signed-off-by: Vinod Koul --- .../phy/allwinner,sun8i-h3-usb-phy.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml index 77539b4601c24..2df012d13655e 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml @@ -36,18 +36,22 @@ properties: - const: pmu3 clocks: + minItems: 4 items: - description: USB OTG PHY bus clock - description: USB Host 0 PHY bus clock - description: USB Host 1 PHY bus clock - description: USB Host 2 PHY bus clock + - description: PMU clock for host port 2 clock-names: + minItems: 4 items: - const: usb0_phy - const: usb1_phy - const: usb2_phy - const: usb3_phy + - const: pmu2_clk resets: items: @@ -96,6 +100,28 @@ required: - resets - reset-names +allOf: + - if: + properties: + compatible: + contains: + enum: + - allwinner,sun50i-h616-usb-phy + then: + properties: + clocks: + minItems: 5 + + clock-names: + minItems: 5 + else: + properties: + clocks: + maxItems: 4 + + clock-names: + maxItems: 4 + additionalProperties: false examples: -- GitLab From b45c6d80325bec2b78c716629a518b6442d8bdc6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:54 +0000 Subject: [PATCH 141/694] phy: sun4i-usb: Introduce port2 SIDDQ quirk At least the Allwinner H616 SoC requires a weird quirk to make most USB PHYs work: Only port2 works out of the box, but all other ports need some help from this port2 to work correctly: The CLK_BUS_PHY2 and RST_USB_PHY2 clock and reset need to be enabled, and the SIDDQ bit in the PMU PHY control register needs to be cleared. For this register to be accessible, CLK_BUS_ECHI2 needs to be ungated. Don't ask .... Instead of disguising this as some generic feature, treat it more like a quirk (what it really is): If the quirk bit is set, and we initialise a PHY other than PHY2, ungate this one special clock, and clear the SIDDQ bit. We also pick the clock and reset from PHY2 and enable them as well. Signed-off-by: Andre Przywara Link: https://lore.kernel.org/r/20221031111358.3387297-4-andre.przywara@arm.com Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun4i-usb.c | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index 3a3831f6059a3..e39f5ad62cc19 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -120,6 +120,7 @@ struct sun4i_usb_phy_cfg { u8 phyctl_offset; bool dedicated_clocks; bool phy0_dual_route; + bool needs_phy2_siddq; int missing_phys; }; @@ -289,6 +290,50 @@ static int sun4i_usb_phy_init(struct phy *_phy) return ret; } + /* Some PHYs on some SoCs need the help of PHY2 to work. */ + if (data->cfg->needs_phy2_siddq && phy->index != 2) { + struct sun4i_usb_phy *phy2 = &data->phys[2]; + + ret = clk_prepare_enable(phy2->clk); + if (ret) { + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + ret = reset_control_deassert(phy2->reset); + if (ret) { + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + /* + * This extra clock is just needed to access the + * REG_HCI_PHY_CTL PMU register for PHY2. + */ + ret = clk_prepare_enable(phy2->clk2); + if (ret) { + reset_control_assert(phy2->reset); + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy->reset); + clk_disable_unprepare(phy->clk2); + clk_disable_unprepare(phy->clk); + return ret; + } + + if (phy2->pmu && data->cfg->hci_phy_ctl_clear) { + val = readl(phy2->pmu + REG_HCI_PHY_CTL); + val &= ~data->cfg->hci_phy_ctl_clear; + writel(val, phy2->pmu + REG_HCI_PHY_CTL); + } + + clk_disable_unprepare(phy->clk2); + } + if (phy->pmu && data->cfg->hci_phy_ctl_clear) { val = readl(phy->pmu + REG_HCI_PHY_CTL); val &= ~data->cfg->hci_phy_ctl_clear; @@ -354,6 +399,13 @@ static int sun4i_usb_phy_exit(struct phy *_phy) data->phy0_init = false; } + if (data->cfg->needs_phy2_siddq && phy->index != 2) { + struct sun4i_usb_phy *phy2 = &data->phys[2]; + + clk_disable_unprepare(phy2->clk); + reset_control_assert(phy2->reset); + } + sun4i_usb_phy_passby(phy, 0); reset_control_assert(phy->reset); clk_disable_unprepare(phy->clk2); @@ -785,6 +837,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) dev_err(dev, "failed to get clock %s\n", name); return PTR_ERR(phy->clk2); } + } else { + snprintf(name, sizeof(name), "pmu%d_clk", i); + phy->clk2 = devm_clk_get_optional(dev, name); + if (IS_ERR(phy->clk2)) { + dev_err(dev, "failed to get clock %s\n", name); + return PTR_ERR(phy->clk2); + } } snprintf(name, sizeof(name), "usb%d_reset", i); -- GitLab From 0f607406525d25019dd9c498bcc0b42734fc59d5 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 31 Oct 2022 11:13:55 +0000 Subject: [PATCH 142/694] phy: sun4i-usb: Add support for the H616 USB PHY The USB PHY used in the Allwinner H616 SoC inherits some traits from its various predecessors: it has four full PHYs like the H3, needs some extra bits to be set like the H6, and puts SIDDQ on a different bit like the A100. Plus it needs this weird PHY2 quirk. Name all those properties in a new config struct and assign a new compatible name to it. Signed-off-by: Andre Przywara Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20221031111358.3387297-5-andre.przywara@arm.com Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun4i-usb.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index e39f5ad62cc19..5472db9e87ef8 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -1032,6 +1032,17 @@ static const struct sun4i_usb_phy_cfg sun50i_h6_cfg = { .missing_phys = BIT(1) | BIT(2), }; +static const struct sun4i_usb_phy_cfg sun50i_h616_cfg = { + .num_phys = 4, + .type = sun50i_h6_phy, + .disc_thresh = 3, + .phyctl_offset = REG_PHYCTL_A33, + .dedicated_clocks = true, + .phy0_dual_route = true, + .hci_phy_ctl_clear = PHY_CTL_SIDDQ, + .needs_phy2_siddq = true, +}; + static const struct of_device_id sun4i_usb_phy_of_match[] = { { .compatible = "allwinner,sun4i-a10-usb-phy", .data = &sun4i_a10_cfg }, { .compatible = "allwinner,sun5i-a13-usb-phy", .data = &sun5i_a13_cfg }, @@ -1047,6 +1058,7 @@ static const struct of_device_id sun4i_usb_phy_of_match[] = { { .compatible = "allwinner,sun50i-a64-usb-phy", .data = &sun50i_a64_cfg}, { .compatible = "allwinner,sun50i-h6-usb-phy", .data = &sun50i_h6_cfg }, + { .compatible = "allwinner,sun50i-h616-usb-phy", .data = &sun50i_h616_cfg }, { }, }; MODULE_DEVICE_TABLE(of, sun4i_usb_phy_of_match); -- GitLab From 8ca2a81bff096b359736bba9b7d06cf5bc04fa88 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:30 +0530 Subject: [PATCH 143/694] dt-bindings: phy: ti: phy-gmii-sel: Add bindings for J721e TI's J721e SoC supports additional PHY modes like QSGMII and SGMII that are not supported on earlier SoCs. Add a compatible for it. Extend ti,qsgmii-main-ports property to support selection of upto two main ports at once across the two QSGMII interfaces. Signed-off-by: Siddharth Vadapalli Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20221026074532.109220-2-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- .../bindings/phy/ti,phy-gmii-sel.yaml | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml index da7cac537e15c..3a6d686383cf9 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml @@ -54,6 +54,7 @@ properties: - ti,dm814-phy-gmii-sel - ti,am654-phy-gmii-sel - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel reg: maxItems: 1 @@ -63,14 +64,17 @@ properties: ti,qsgmii-main-ports: $ref: /schemas/types.yaml#/definitions/uint32-array description: | - Required only for QSGMII mode. Array to select the port for - QSGMII main mode. Rest of the ports are selected as QSGMII_SUB - ports automatically. Any one of the 4 CPSW5G ports can act as the - main port with the rest of them being the QSGMII_SUB ports. - maxItems: 1 + Required only for QSGMII mode. Array to select the port/s for QSGMII + main mode. The size of the array corresponds to the number of QSGMII + interfaces and thus, the number of distinct QSGMII main ports, + supported by the device. If the device supports two QSGMII interfaces + but only one QSGMII interface is desired, repeat the QSGMII main port + value corresponding to the QSGMII interface in the array. + minItems: 1 + maxItems: 2 items: minimum: 1 - maximum: 4 + maximum: 8 allOf: - if: @@ -81,12 +85,43 @@ allOf: - ti,dra7xx-phy-gmii-sel - ti,dm814-phy-gmii-sel - ti,am654-phy-gmii-sel + - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel then: properties: '#phy-cells': const: 1 description: CPSW port number (starting from 1) + - if: + properties: + compatible: + contains: + enum: + - ti,j7200-cpsw5g-phy-gmii-sel + then: + properties: + ti,qsgmii-main-ports: + maxItems: 1 + items: + minimum: 1 + maximum: 4 + + - if: + properties: + compatible: + contains: + enum: + - ti,j721e-cpsw9g-phy-gmii-sel + then: + properties: + ti,qsgmii-main-ports: + minItems: 2 + maxItems: 2 + items: + minimum: 1 + maximum: 8 + - if: not: properties: @@ -94,6 +129,7 @@ allOf: contains: enum: - ti,j7200-cpsw5g-phy-gmii-sel + - ti,j721e-cpsw9g-phy-gmii-sel then: properties: ti,qsgmii-main-ports: false -- GitLab From 3b66ab69c566e79d58cc38bd7c90a6b2b0b84a7d Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:31 +0530 Subject: [PATCH 144/694] phy: ti: gmii-sel: Update methods for fetching and using qsgmii main port The number of QSGMII main ports are specific to the device. TI's J7200 for which the QSGMII main port property is fetched from the device-tree has only one QSGMII main port. However, devices like TI's J721e support up to two QSGMII main ports. Thus, the existing methods for fetching and using the QSGMII main port are not scalable. Update the existing methods for handling the QSGMII main ports and its associated requirements to make it scalable for future devices. Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221026074532.109220-3-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 0bcfd6d96b4d0..c8f30d2e1f460 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -50,6 +50,7 @@ struct phy_gmii_sel_soc_data { const struct reg_field (*regfields)[PHY_GMII_SEL_LAST]; bool use_of_data; u64 extra_modes; + u32 num_qsgmii_main_ports; }; struct phy_gmii_sel_priv { @@ -213,6 +214,8 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { .use_of_data = true, .regfields = phy_gmii_sel_fields_am654, .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), + .num_ports = 4, + .num_qsgmii_main_ports = 1, }; static const struct of_device_id phy_gmii_sel_id_table[] = { @@ -378,11 +381,13 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) static int phy_gmii_sel_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + const struct phy_gmii_sel_soc_data *soc_data; struct device_node *node = dev->of_node; const struct of_device_id *of_id; struct phy_gmii_sel_priv *priv; u32 main_ports = 1; int ret; + u32 i; of_id = of_match_node(phy_gmii_sel_id_table, pdev->dev.of_node); if (!of_id) @@ -394,16 +399,26 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) priv->dev = &pdev->dev; priv->soc_data = of_id->data; + soc_data = priv->soc_data; priv->num_ports = priv->soc_data->num_ports; - of_property_read_u32(node, "ti,qsgmii-main-ports", &main_ports); + priv->qsgmii_main_ports = 0; + /* - * Ensure that main_ports is within bounds. If the property - * ti,qsgmii-main-ports is not mentioned, or the value mentioned - * is out of bounds, default to 1. + * Based on the compatible, try to read the appropriate number of + * QSGMII main ports from the "ti,qsgmii-main-ports" property from + * the device-tree node. */ - if (main_ports < 1 || main_ports > 4) - main_ports = 1; - priv->qsgmii_main_ports = PHY_GMII_PORT(main_ports); + for (i = 0; i < soc_data->num_qsgmii_main_ports; i++) { + of_property_read_u32_index(node, "ti,qsgmii-main-ports", i, &main_ports); + /* + * Ensure that main_ports is within bounds. + */ + if (main_ports < 1 || main_ports > soc_data->num_ports) { + dev_err(dev, "Invalid qsgmii main port provided\n"); + return -EINVAL; + } + priv->qsgmii_main_ports |= PHY_GMII_PORT(main_ports); + } priv->regmap = syscon_node_to_regmap(node->parent); if (IS_ERR(priv->regmap)) { -- GitLab From 5bd78c00d753d4e80e151555565334c475a559d3 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 26 Oct 2022 13:15:32 +0530 Subject: [PATCH 145/694] phy: ti: gmii-sel: Add support for CPSW9G GMII SEL in J721e Each of the CPSW9G ports in J721e support additional modes like QSGMII. Add a new compatible for J721e to support the additional modes. In TI's J721e, each of the CPSW9G ethernet interfaces can act as a QSGMII main or QSGMII-SUB port. The QSGMII main interface is responsible for performing auto-negotiation between the MAC and the PHY while the rest of the interfaces are designated as QSGMII-SUB interfaces, indicating that they will not be taking part in the auto-negotiation process. Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221026074532.109220-4-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index c8f30d2e1f460..8c667819c39a1 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -218,6 +218,15 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { .num_qsgmii_main_ports = 1, }; +static const +struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j721e = { + .use_of_data = true, + .regfields = phy_gmii_sel_fields_am654, + .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), + .num_ports = 8, + .num_qsgmii_main_ports = 2, +}; + static const struct of_device_id phy_gmii_sel_id_table[] = { { .compatible = "ti,am3352-phy-gmii-sel", @@ -243,6 +252,10 @@ static const struct of_device_id phy_gmii_sel_id_table[] = { .compatible = "ti,j7200-cpsw5g-phy-gmii-sel", .data = &phy_gmii_sel_cpsw5g_soc_j7200, }, + { + .compatible = "ti,j721e-cpsw9g-phy-gmii-sel", + .data = &phy_gmii_sel_cpsw9g_soc_j721e, + }, {} }; MODULE_DEVICE_TABLE(of, phy_gmii_sel_id_table); -- GitLab From 53bffe0055741440a6c91abb80bad1c62ea443e3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Oct 2022 15:44:49 -0700 Subject: [PATCH 146/694] phy: phy-brcm-usb: Utilize platform_get_irq_byname_optional() The wake-up interrupt lines are entirely optional, avoid printing messages that interrupts were not found by switching to the _optional variant. Signed-off-by: Florian Fainelli Acked-by: Justin Chen Link: https://lore.kernel.org/r/20221026224450.2958762-1-f.fainelli@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/broadcom/phy-brcm-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index aafba4a047018..4de39999f43d3 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -446,9 +446,9 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev, priv->suspend_clk = NULL; } - priv->wake_irq = platform_get_irq_byname(pdev, "wake"); + priv->wake_irq = platform_get_irq_byname_optional(pdev, "wake"); if (priv->wake_irq < 0) - priv->wake_irq = platform_get_irq_byname(pdev, "wakeup"); + priv->wake_irq = platform_get_irq_byname_optional(pdev, "wakeup"); if (priv->wake_irq >= 0) { err = devm_request_irq(dev, priv->wake_irq, brcm_usb_phy_wake_isr, 0, -- GitLab From 2428787f16155aa03aa63d5c130e83809a7df5cf Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Wed, 5 Oct 2022 16:40:31 +0800 Subject: [PATCH 147/694] phy: tegra: xusb: Remove usb3 supply Remove redundant codes for getting the vbus supply of usb3 ports because we get and control the vbus supply by the companion usb2 ports Signed-off-by: Wayne Chang Signed-off-by: Haotien Hsu Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/20221005084031.2154251-1-haotienh@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra124.c | 1 - drivers/phy/tegra/xusb-tegra186.c | 1 - drivers/phy/tegra/xusb-tegra210.c | 1 - drivers/phy/tegra/xusb.c | 10 +--------- drivers/phy/tegra/xusb.h | 2 -- 5 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/phy/tegra/xusb-tegra124.c b/drivers/phy/tegra/xusb-tegra124.c index db56c7fbe60be..f4f75ea033b81 100644 --- a/drivers/phy/tegra/xusb-tegra124.c +++ b/drivers/phy/tegra/xusb-tegra124.c @@ -1652,7 +1652,6 @@ tegra124_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra124_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra124_usb3_port_enable, .disable = tegra124_usb3_port_disable, .map = tegra124_usb3_port_map, diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 0996ede63387a..6a8bd87cfdbda 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -1185,7 +1185,6 @@ tegra186_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra186_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra186_usb3_port_enable, .disable = tegra186_usb3_port_disable, .map = tegra186_usb3_port_map, diff --git a/drivers/phy/tegra/xusb-tegra210.c b/drivers/phy/tegra/xusb-tegra210.c index eedfc7c2cc052..ebc8a7e21a318 100644 --- a/drivers/phy/tegra/xusb-tegra210.c +++ b/drivers/phy/tegra/xusb-tegra210.c @@ -3078,7 +3078,6 @@ tegra210_usb3_port_map(struct tegra_xusb_port *port) static const struct tegra_xusb_port_ops tegra210_usb3_port_ops = { .release = tegra_xusb_usb3_port_release, - .remove = tegra_xusb_usb3_port_remove, .enable = tegra210_usb3_port_enable, .disable = tegra210_usb3_port_disable, .map = tegra210_usb3_port_map, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index dce45fbbd699c..ff4b930879f3c 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -954,8 +954,7 @@ static int tegra_xusb_usb3_port_parse_dt(struct tegra_xusb_usb3_port *usb3) return -EINVAL; } - usb3->supply = regulator_get(&port->dev, "vbus"); - return PTR_ERR_OR_ZERO(usb3->supply); + return 0; } static int tegra_xusb_add_usb3_port(struct tegra_xusb_padctl *padctl, @@ -1012,13 +1011,6 @@ void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port) kfree(usb3); } -void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port) -{ - struct tegra_xusb_usb3_port *usb3 = to_usb3_port(port); - - regulator_put(usb3->supply); -} - static void __tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl) { struct tegra_xusb_port *port, *tmp; diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h index 8cfbbdbd6e0c0..c384734a61c2a 100644 --- a/drivers/phy/tegra/xusb.h +++ b/drivers/phy/tegra/xusb.h @@ -359,7 +359,6 @@ void tegra_xusb_hsic_port_release(struct tegra_xusb_port *port); struct tegra_xusb_usb3_port { struct tegra_xusb_port base; - struct regulator *supply; bool context_saved; unsigned int port; bool internal; @@ -381,7 +380,6 @@ struct tegra_xusb_usb3_port * tegra_xusb_find_usb3_port(struct tegra_xusb_padctl *padctl, unsigned int index); void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port); -void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port); struct tegra_xusb_port_ops { void (*release)(struct tegra_xusb_port *port); -- GitLab From 846d479224537185768276dd4a84c1bda2bbcd4e Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 18 Oct 2022 13:58:41 -0400 Subject: [PATCH 148/694] doc: phy: Document typical order of API calls Document the typical order of API calls to used by new drivers and controllers. Many existing controllers follow this order, but some do not. This is especially true for controllers designed to work with one particular PHY driver, which may not need a call to (for example) phy_init. Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20221018175841.1906611-1-sean.anderson@seco.com Signed-off-by: Vinod Koul --- Documentation/driver-api/phy/phy.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst index 8fc1ce0bb905d..8e8b3e8f95238 100644 --- a/Documentation/driver-api/phy/phy.rst +++ b/Documentation/driver-api/phy/phy.rst @@ -94,7 +94,8 @@ Inorder to dereference the private data (in phy_ops), the phy provider driver can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in phy_ops to get back the private data. -4. Getting a reference to the PHY +Getting a reference to the PHY +============================== Before the controller can make use of the PHY, it has to get a reference to it. This framework provides the following APIs to get a reference to the PHY. @@ -130,6 +131,28 @@ the phy_init() and phy_exit() calls, and phy_power_on() and phy_power_off() calls are all NOP when applied to a NULL phy. The NULL phy is useful in devices for handling optional phy devices. +Order of API calls +================== + +The general order of calls should be:: + + [devm_][of_]phy_get() + phy_init() + phy_power_on() + [phy_set_mode[_ext]()] + ... + phy_power_off() + phy_exit() + [[of_]phy_put()] + +Some PHY drivers may not implement :c:func:`phy_init` or :c:func:`phy_power_on`, +but controllers should always call these functions to be compatible with other +PHYs. Some PHYs may require :c:func:`phy_set_mode `, while +others may use a default mode (typically configured via devicetree or other +firmware). For compatibility, you should always call this function if you know +what mode you will be using. Generally, this function should be called after +:c:func:`phy_power_on`, although some PHY drivers may allow it at any time. + Releasing a reference to the PHY ================================ -- GitLab From ad9f64cd2d4a17f0d5ecf04d38170fdc34f21c61 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 3 Oct 2022 09:58:48 -0700 Subject: [PATCH 149/694] LICENSES: Add the copyleft-next-0.3.1 license Add the full text of the copyleft-next-0.3.1 license to the kernel tree as well as the required tags for reference and tooling. The license text was copied directly from the copyleft-next project's git tree [0]. Discussion of using copyleft-next-0.3.1 on Linux started since June, 2016 [1]. In the end Linus' preference was to have drivers use MODULE_LICENSE("GPL") to make it clear that the GPL applies when it comes to Linux [2]. Additionally, even though copyleft-next-0.3.1 has been found to be to be GPLv2 compatible by three attorneys at SUSE and Redhat [3], to err on the side of caution we simply recommend to always use the "OR" language for this license [4]. Even though it has been a goal of the project to be GPL-v2 compatible to be certain in 2016 I asked for a clarification about what makes copyleft-next GPLv2 compatible and also asked for a summary of benefits. This prompted some small minor changes to make compatibility even further clear and as of copyleft 0.3.1 compatibility should be crystal clear [5]. The summary of why copyleft-next 0.3.1 is compatible with GPLv2 is explained as follows: Like GPLv2, copyleft-next requires distribution of derivative works ("Derived Works" in copyleft-next 0.3.x) to be under the same license. Ordinarily this would make the two licenses incompatible. However, copyleft-next 0.3.1 says: "If the Derived Work includes material licensed under the GPL, You may instead license the Derived Work under the GPL." "GPL" is defined to include GPLv2. In practice this means copyleft-next code in Linux may be licensed under the GPL2, however there are additional obvious gains for bringing contributions from Linux outbound where copyleft-next is preferred. A summary of benefits why projects outside of Linux might prefer to use copyleft-next >= 0.3.1 over GPLv2: o It is much shorter and simpler o It has an explicit patent license grant, unlike GPLv2 o Its notice preservation conditions are clearer o More free software/open source licenses are compatible with it (via section 4) o The source code requirement triggered by binary distribution is much simpler in a procedural sense o Recipients potentially have a contract claim against distributors who are noncompliant with the source code requirement o There is a built-in inbound=outbound policy for upstream contributions (cf. Apache License 2.0 section 5) o There are disincentives to engage in the controversial practice of copyleft/ proprietary dual-licensing o In 15 years copyleft expires, which can be advantageous for legacy code o There are explicit disincentives to bringing patent infringement claims accusing the licensed work of infringement (see 10b) o There is a cure period for licensees who are not compliant with the license (there is no cure opportunity in GPLv2) o copyleft-next has a 'built-in or-later' provision The first driver submission to Linux under this dual strategy was lib/test_sysctl.c through commit 9308f2f9e7f05 ("test_sysctl: add dedicated proc sysctl test driver") merged in July 2017. Shortly after that I also added test_kmod through commit d9c6a72d6fa29 ("kmod: add test driver to stress test the module loader") in the same month. These two drivers went in just a few months before the SPDX license practice kicked in. In 2018 Kuno Woudt went through the process to get SPDX identifiers for copyleft-next [6] [7]. Although there are SPDX tags for copyleft-next-0.3.0, we only document use in Linux starting from copyleft-next-0.3.1 which makes GPLv2 compatibility crystal clear. This patch will let us update the two Linux selftest drivers in subsequent patches with their respective SPDX license identifiers and let us remove repetitive license boiler plate. [0] https://github.com/copyleft-next/copyleft-next/blob/master/Releases/copyleft-next-0.3.1 [1] https://lore.kernel.org/lkml/1465929311-13509-1-git-send-email-mcgrof@kernel.org/ [2] https://lore.kernel.org/lkml/CA+55aFyhxcvD+q7tp+-yrSFDKfR0mOHgyEAe=f_94aKLsOu0Og@mail.gmail.com/ [3] https://lore.kernel.org/lkml/20170516232702.GL17314@wotan.suse.de/ [4] https://lkml.kernel.org/r/1495234558.7848.122.camel@linux.intel.com [5] https://lists.fedorahosted.org/archives/list/copyleft-next@lists.fedorahosted.org/thread/JTGV56DDADWGKU7ZKTZA4DLXTGTLNJ57/#SQMDIKBRAVDOCT4UVNOOCRGBN2UJIKHZ [6] https://spdx.org/licenses/copyleft-next-0.3.0.html [7] https://spdx.org/licenses/copyleft-next-0.3.1.html Cc: Goldwyn Rodrigues Cc: Kuno Woudt Cc: Richard Fontana Cc: copyleft-next@lists.fedorahosted.org Cc: Ciaran Farrell Cc: Christopher De Nicolo Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Jonathan Corbet Cc: Thorsten Leemhuis Cc: Andrew Morton Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain Reviewed-by: Tim Bird Signed-off-by: Greg Kroah-Hartman --- LICENSES/dual/copyleft-next-0.3.1 | 236 ++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 LICENSES/dual/copyleft-next-0.3.1 diff --git a/LICENSES/dual/copyleft-next-0.3.1 b/LICENSES/dual/copyleft-next-0.3.1 new file mode 100644 index 0000000000000..c81acf7106572 --- /dev/null +++ b/LICENSES/dual/copyleft-next-0.3.1 @@ -0,0 +1,236 @@ +Valid-License-Identifier: copyleft-next-0.3.1 +SPDX-URL: https://spdx.org/licenses/copyleft-next-0.3.1 +Usage-Guide: + copyleft-next-0.3.1 is explicitly compatible with GPLv2 (or later) and + can therefore be used for kernel code. Though the best and recommended + practice is to express this in the SPDX license identifier by + licensing the code under both licenses expressed by the OR operator. + To use the copyleft-next-0.3.1 license put the following SPDX tag/value + pair into a comment according to the placement guidelines in the + licensing rules documentation: + SPDX-License-Identifier: GPL-2.0-only OR copyleft-next 0.3.1 + SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +License-Text: + +======================================================================= + + copyleft-next 0.3.1 ("this License") + Release date: 2016-04-29 + +1. License Grants; No Trademark License + + Subject to the terms of this License, I grant You: + + a) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable + copyright license, to reproduce, Distribute, prepare derivative works + of, publicly perform and publicly display My Work. + + b) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable + patent license under Licensed Patents to make, have made, use, sell, + offer for sale, and import Covered Works. + + This License does not grant any rights in My name, trademarks, service + marks, or logos. + +2. Distribution: General Conditions + + You may Distribute Covered Works, provided that You (i) inform + recipients how they can obtain a copy of this License; (ii) satisfy the + applicable conditions of sections 3 through 6; and (iii) preserve all + Legal Notices contained in My Work (to the extent they remain + pertinent). "Legal Notices" means copyright notices, license notices, + license texts, and author attributions, but does not include logos, + other graphical images, trademarks or trademark legends. + +3. Conditions for Distributing Derived Works; Outbound GPL Compatibility + + If You Distribute a Derived Work, You must license the entire Derived + Work as a whole under this License, with prominent notice of such + licensing. This condition may not be avoided through such means as + separate Distribution of portions of the Derived Work. + + If the Derived Work includes material licensed under the GPL, You may + instead license the Derived Work under the GPL. + +4. Condition Against Further Restrictions; Inbound License Compatibility + + When Distributing a Covered Work, You may not impose further + restrictions on the exercise of rights in the Covered Work granted under + this License. This condition is not excused merely because such + restrictions result from Your compliance with conditions or obligations + extrinsic to this License (such as a court order or an agreement with a + third party). + + However, You may Distribute a Covered Work incorporating material + governed by a license that is both OSI-Approved and FSF-Free as of the + release date of this License, provided that compliance with such + other license would not conflict with any conditions stated in other + sections of this License. + +5. Conditions for Distributing Object Code + + You may Distribute an Object Code form of a Covered Work, provided that + you accompany the Object Code with a URL through which the Corresponding + Source is made available, at no charge, by some standard or customary + means of providing network access to source code. + + If you Distribute the Object Code in a physical product or tangible + storage medium ("Product"), the Corresponding Source must be available + through such URL for two years from the date of Your most recent + Distribution of the Object Code in the Product. However, if the Product + itself contains or is accompanied by the Corresponding Source (made + available in a customarily accessible manner), You need not also comply + with the first paragraph of this section. + + Each direct and indirect recipient of the Covered Work from You is an + intended third-party beneficiary of this License solely as to this + section 5, with the right to enforce its terms. + +6. Symmetrical Licensing Condition for Upstream Contributions + + If You Distribute a work to Me specifically for inclusion in or + modification of a Covered Work (a "Patch"), and no explicit licensing + terms apply to the Patch, You license the Patch under this License, to + the extent of Your copyright in the Patch. This condition does not + negate the other conditions of this License, if applicable to the Patch. + +7. Nullification of Copyleft/Proprietary Dual Licensing + + If I offer to license, for a fee, a Covered Work under terms other than + a license that is OSI-Approved or FSF-Free as of the release date of this + License or a numbered version of copyleft-next released by the + Copyleft-Next Project, then the license I grant You under section 1 is no + longer subject to the conditions in sections 3 through 5. + +8. Copyleft Sunset + + The conditions in sections 3 through 5 no longer apply once fifteen + years have elapsed from the date of My first Distribution of My Work + under this License. + +9. Pass-Through + + When You Distribute a Covered Work, the recipient automatically receives + a license to My Work from Me, subject to the terms of this License. + +10. Termination + + Your license grants under section 1 are automatically terminated if You + + a) fail to comply with the conditions of this License, unless You cure + such noncompliance within thirty days after becoming aware of it, or + + b) initiate a patent infringement litigation claim (excluding + declaratory judgment actions, counterclaims, and cross-claims) + alleging that any part of My Work directly or indirectly infringes + any patent. + + Termination of Your license grants extends to all copies of Covered + Works You subsequently obtain. Termination does not terminate the + rights of those who have received copies or rights from You subject to + this License. + + To the extent permission to make copies of a Covered Work is necessary + merely for running it, such permission is not terminable. + +11. Later License Versions + + The Copyleft-Next Project may release new versions of copyleft-next, + designated by a distinguishing version number ("Later Versions"). + Unless I explicitly remove the option of Distributing Covered Works + under Later Versions, You may Distribute Covered Works under any Later + Version. + +** 12. No Warranty ** +** ** +** My Work is provided "as-is", without warranty. You bear the risk ** +** of using it. To the extent permitted by applicable law, each ** +** Distributor of My Work excludes the implied warranties of title, ** +** merchantability, fitness for a particular purpose and ** +** non-infringement. ** + +** 13. Limitation of Liability ** +** ** +** To the extent permitted by applicable law, in no event will any ** +** Distributor of My Work be liable to You for any damages ** +** whatsoever, whether direct, indirect, special, incidental, or ** +** consequential damages, whether arising under contract, tort ** +** (including negligence), or otherwise, even where the Distributor ** +** knew or should have known about the possibility of such damages. ** + +14. Severability + + The invalidity or unenforceability of any provision of this License + does not affect the validity or enforceability of the remainder of + this License. Such provision is to be reformed to the minimum extent + necessary to make it valid and enforceable. + +15. Definitions + + "Copyleft-Next Project" means the project that maintains the source + code repository at + as of the release date of this License. + + "Corresponding Source" of a Covered Work in Object Code form means (i) + the Source Code form of the Covered Work; (ii) all scripts, + instructions and similar information that are reasonably necessary for + a skilled developer to generate such Object Code from the Source Code + provided under (i); and (iii) a list clearly identifying all Separate + Works (other than those provided in compliance with (ii)) that were + specifically used in building and (if applicable) installing the + Covered Work (for example, a specified proprietary compiler including + its version number). Corresponding Source must be machine-readable. + + "Covered Work" means My Work or a Derived Work. + + "Derived Work" means a work of authorship that copies from, modifies, + adapts, is based on, is a derivative work of, transforms, translates or + contains all or part of My Work, such that copyright permission is + required. The following are not Derived Works: (i) Mere Aggregation; + (ii) a mere reproduction of My Work; and (iii) if My Work fails to + explicitly state an expectation otherwise, a work that merely makes + reference to My Work. + + "Distribute" means to distribute, transfer or make a copy available to + someone else, such that copyright permission is required. + + "Distributor" means Me and anyone else who Distributes a Covered Work. + + "FSF-Free" means classified as 'free' by the Free Software Foundation. + + "GPL" means a version of the GNU General Public License or the GNU + Affero General Public License. + + "I"/"Me"/"My" refers to the individual or legal entity that places My + Work under this License. "You"/"Your" refers to the individual or legal + entity exercising rights in My Work under this License. A legal entity + includes each entity that controls, is controlled by, or is under + common control with such legal entity. "Control" means (a) the power to + direct the actions of such legal entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent of the + outstanding shares or beneficial ownership of such legal entity. + + "Licensed Patents" means all patent claims licensable royalty-free by + Me, now or in the future, that are necessarily infringed by making, + using, or selling My Work, and excludes claims that would be infringed + only as a consequence of further modification of My Work. + + "Mere Aggregation" means an aggregation of a Covered Work with a + Separate Work. + + "My Work" means the particular work of authorship I license to You + under this License. + + "Object Code" means any form of a work that is not Source Code. + + "OSI-Approved" means approved as 'Open Source' by the Open Source + Initiative. + + "Separate Work" means a work that is separate from and independent of a + particular Covered Work and is not by its nature an extension or + enhancement of the Covered Work, and/or a runtime library, standard + library or similar component that is used to generate an Object Code + form of a Covered Work. + + "Source Code" means the preferred form of a work for making + modifications to it. -- GitLab From 6cad1ecd4e3213d892b70afa999a81849d1f0206 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 3 Oct 2022 09:58:49 -0700 Subject: [PATCH 150/694] testing: use the copyleft-next-0.3.1 SPDX tag Two selftests drivers exist under the copyleft-next license. These drivers were added prior to SPDX practice taking full swing in the kernel. Now that we have an SPDX tag for copyleft-next-0.3.1 documented, embrace it and remove the boiler plate. Cc: Goldwyn Rodrigues Cc: Kuno Woudt Cc: Richard Fontana Cc: copyleft-next@lists.fedorahosted.org Cc: Ciaran Farrell Cc: Christopher De Nicolo Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Jonathan Corbet Cc: Thorsten Leemhuis Cc: Andrew Morton Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain Reviewed-by: Tim Bird Signed-off-by: Greg Kroah-Hartman --- lib/test_kmod.c | 12 +----------- lib/test_sysctl.c | 12 +----------- tools/testing/selftests/kmod/kmod.sh | 13 +------------ tools/testing/selftests/sysctl/sysctl.sh | 12 +----------- 4 files changed, 4 insertions(+), 45 deletions(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index cb800b1d0d99c..6423df9fa8dd4 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1,18 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 /* * kmod stress test driver * * Copyright (C) 2017 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or at your option any - * later version; or, when distributed separately from the Linux kernel or - * when incorporated into other software packages, subject to the following - * license: - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of copyleft-next (version 0.3.1 or later) as published - * at http://copyleft-next.org/. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 9a564971f539a..e2a816d85ea24 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -1,18 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 /* * proc sysctl test driver * * Copyright (C) 2017 Luis R. Rodriguez - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or at your option any - * later version; or, when distributed separately from the Linux kernel or - * when incorporated into other software packages, subject to the following - * license: - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of copyleft-next (version 0.3.1 or later) as published - * at http://copyleft-next.org/. */ /* diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index afd42387e8b21..7189715d7960f 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -1,18 +1,7 @@ #!/bin/bash -# +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez # -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. - # This is a stress test script for kmod, the kernel module loader. It uses # test_kmod which exposes a series of knobs for the API for us so we can # tweak each test in userspace rather than in kernelspace. diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index f50778a3d7445..bfc54b422f25a 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -1,16 +1,6 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 # Copyright (C) 2017 Luis R. Rodriguez -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or at your option any -# later version; or, when distributed separately from the Linux kernel or -# when incorporated into other software packages, subject to the following -# license: -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of copyleft-next (version 0.3.1 or later) as published -# at http://copyleft-next.org/. # This performs a series tests against the proc sysctl interface. -- GitLab From e0767e391079687081c5564b1390983c36b49cd1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Nov 2022 10:35:20 +0800 Subject: [PATCH 151/694] soundwire: cadence: rename sdw_cdns_dai_dma_data as sdw_cdns_dai_runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing 'struct sdw_cdns_dma_data' has really nothing to do with DMAs. The information is stored in the dai->dma_data, but this is really private data that should be stored in a different context. Beyond the academic elegance discussion, using dma_data is a problem for new Intel hardware where the dma_data structure is already used for true DMA handling performed by other parts of the code. This patch prepares a transition away from the use of dma_data, for now with a rename-only change. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221101023521.2384586-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 30 +++++----- drivers/soundwire/cadence_master.h | 4 +- drivers/soundwire/intel.c | 96 +++++++++++++++--------------- 3 files changed, 65 insertions(+), 65 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 93929f19d0831..235617b0542fc 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1707,40 +1707,40 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, void *stream, int direction) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; if (stream) { /* first paranoia check */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; else - dma = dai->capture_dma_data; + dai_runtime = dai->capture_dma_data; - if (dma) { + if (dai_runtime) { dev_err(dai->dev, - "dma_data already allocated for dai %s\n", + "dai_runtime already allocated for dai %s\n", dai->name); return -EINVAL; } - /* allocate and set dma info */ - dma = kzalloc(sizeof(*dma), GFP_KERNEL); - if (!dma) + /* allocate and set dai_runtime info */ + dai_runtime = kzalloc(sizeof(*dai_runtime), GFP_KERNEL); + if (!dai_runtime) return -ENOMEM; - dma->stream_type = SDW_STREAM_PCM; + dai_runtime->stream_type = SDW_STREAM_PCM; - dma->bus = &cdns->bus; - dma->link_id = cdns->instance; + dai_runtime->bus = &cdns->bus; + dai_runtime->link_id = cdns->instance; - dma->stream = stream; + dai_runtime->stream = stream; if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai->playback_dma_data = dma; + dai->playback_dma_data = dai_runtime; else - dai->capture_dma_data = dma; + dai->capture_dma_data = dai_runtime; } else { - /* for NULL stream we release allocated dma_data */ + /* for NULL stream we release allocated dai_runtime */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) { kfree(dai->playback_dma_data); dai->playback_dma_data = NULL; diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index ca9e805bab88f..93f23bd46e2c2 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -70,7 +70,7 @@ struct sdw_cdns_stream_config { }; /** - * struct sdw_cdns_dma_data: Cadence DMA data + * struct sdw_cdns_dai_runtime: Cadence DAI runtime data * * @name: SoundWire stream name * @stream: stream runtime @@ -82,7 +82,7 @@ struct sdw_cdns_stream_config { * @suspended: status set when suspended, to be used in .prepare * @paused: status set in .trigger, to be used in suspend */ -struct sdw_cdns_dma_data { +struct sdw_cdns_dai_runtime { char *name; struct sdw_stream_runtime *stream; struct sdw_cdns_pdi *pdi; diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 244209358784f..1e9c6df4b62c2 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -824,15 +824,15 @@ static int intel_hw_params(struct snd_pcm_substream *substream, { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; struct sdw_cdns_pdi *pdi; struct sdw_stream_config sconfig; struct sdw_port_config *pconfig; int ch, dir; int ret; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) return -EIO; ch = params_channels(params); @@ -854,10 +854,10 @@ static int intel_hw_params(struct snd_pcm_substream *substream, sdw_cdns_config_stream(cdns, ch, dir, pdi); /* store pdi and hw_params, may be needed in prepare step */ - dma->paused = false; - dma->suspended = false; - dma->pdi = pdi; - dma->hw_params = params; + dai_runtime->paused = false; + dai_runtime->suspended = false; + dai_runtime->pdi = pdi; + dai_runtime->hw_params = params; /* Inform DSP about PDI stream number */ ret = intel_params_stream(sdw, substream->stream, dai, params, @@ -869,7 +869,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, sconfig.direction = dir; sconfig.ch_count = ch; sconfig.frame_rate = params_rate(params); - sconfig.type = dma->stream_type; + sconfig.type = dai_runtime->stream_type; sconfig.bps = snd_pcm_format_width(params_format(params)); @@ -884,7 +884,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, pconfig->ch_mask = (1 << ch) - 1; ret = sdw_stream_add_master(&cdns->bus, &sconfig, - pconfig, 1, dma->stream); + pconfig, 1, dai_runtime->stream); if (ret) dev_err(cdns->dev, "add master to stream failed:%d\n", ret); @@ -898,19 +898,19 @@ static int intel_prepare(struct snd_pcm_substream *substream, { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ch, dir; int ret = 0; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) { - dev_err(dai->dev, "failed to get dma data in %s\n", + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) { + dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); return -EIO; } - if (dma->suspended) { - dma->suspended = false; + if (dai_runtime->suspended) { + dai_runtime->suspended = false; /* * .prepare() is called after system resume, where we @@ -921,21 +921,21 @@ static int intel_prepare(struct snd_pcm_substream *substream, */ /* configure stream */ - ch = params_channels(dma->hw_params); + ch = params_channels(dai_runtime->hw_params); if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) dir = SDW_DATA_DIR_RX; else dir = SDW_DATA_DIR_TX; - intel_pdi_shim_configure(sdw, dma->pdi); - intel_pdi_alh_configure(sdw, dma->pdi); - sdw_cdns_config_stream(cdns, ch, dir, dma->pdi); + intel_pdi_shim_configure(sdw, dai_runtime->pdi); + intel_pdi_alh_configure(sdw, dai_runtime->pdi); + sdw_cdns_config_stream(cdns, ch, dir, dai_runtime->pdi); /* Inform DSP about PDI stream number */ ret = intel_params_stream(sdw, substream->stream, dai, - dma->hw_params, + dai_runtime->hw_params, sdw->instance, - dma->pdi->intel_alh_id); + dai_runtime->pdi->intel_alh_id); } return ret; @@ -946,11 +946,11 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ret; - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) return -EIO; /* @@ -959,10 +959,10 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) * DEPREPARED for the first cpu-dai and to RELEASED for the last * cpu-dai. */ - ret = sdw_stream_remove_master(&cdns->bus, dma->stream); + ret = sdw_stream_remove_master(&cdns->bus, dai_runtime->stream); if (ret < 0) { dev_err(dai->dev, "remove master from stream %s failed: %d\n", - dma->stream->name, ret); + dai_runtime->stream->name, ret); return ret; } @@ -972,8 +972,8 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) return ret; } - dma->hw_params = NULL; - dma->pdi = NULL; + dai_runtime->hw_params = NULL; + dai_runtime->pdi = NULL; return 0; } @@ -996,17 +996,17 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai, static void *intel_get_sdw_stream(struct snd_soc_dai *dai, int direction) { - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; else - dma = dai->capture_dma_data; + dai_runtime = dai->capture_dma_data; - if (!dma) + if (!dai_runtime) return ERR_PTR(-EINVAL); - return dma->stream; + return dai_runtime->stream; } static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) @@ -1014,7 +1014,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_intel_link_res *res = sdw->link_res; - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int ret = 0; /* @@ -1025,9 +1025,9 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn if (res->ops && res->ops->trigger) res->ops->trigger(dai, cmd, substream->stream); - dma = snd_soc_dai_get_dma_data(dai, substream); - if (!dma) { - dev_err(dai->dev, "failed to get dma data in %s\n", + dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + if (!dai_runtime) { + dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); return -EIO; } @@ -1042,17 +1042,17 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn * the .trigger callback is used to track the suspend case only. */ - dma->suspended = true; + dai_runtime->suspended = true; ret = intel_free_stream(sdw, substream->stream, dai, sdw->instance); break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - dma->paused = true; + dai_runtime->paused = true; break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - dma->paused = false; + dai_runtime->paused = false; break; default: break; @@ -1091,25 +1091,25 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) for_each_component_dais(component, dai) { struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_cdns_dma_data *dma; + struct sdw_cdns_dai_runtime *dai_runtime; int stream; int ret; - dma = dai->playback_dma_data; + dai_runtime = dai->playback_dma_data; stream = SNDRV_PCM_STREAM_PLAYBACK; - if (!dma) { - dma = dai->capture_dma_data; + if (!dai_runtime) { + dai_runtime = dai->capture_dma_data; stream = SNDRV_PCM_STREAM_CAPTURE; } - if (!dma) + if (!dai_runtime) continue; - if (dma->suspended) + if (dai_runtime->suspended) continue; - if (dma->paused) { - dma->suspended = true; + if (dai_runtime->paused) { + dai_runtime->suspended = true; ret = intel_free_stream(sdw, stream, dai, sdw->instance); if (ret < 0) -- GitLab From 7dddead766c0826a998e7053e7d1c92b3422f8d6 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Nov 2022 10:35:21 +0800 Subject: [PATCH 152/694] soundwire: cadence: use dai_runtime_array instead of dma_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the code with a Cadence-specific dai_runtime_array, indexed with dai->id, instead of abusing dma_data. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221101023521.2384586-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 30 +++++++++++++-------------- drivers/soundwire/cadence_master.h | 5 +++++ drivers/soundwire/intel.c | 33 +++++++++++++++--------------- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 235617b0542fc..a1de363eba3ff 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -1709,13 +1709,10 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_cdns_dai_runtime *dai_runtime; + dai_runtime = cdns->dai_runtime_array[dai->id]; + if (stream) { /* first paranoia check */ - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai_runtime = dai->playback_dma_data; - else - dai_runtime = dai->capture_dma_data; - if (dai_runtime) { dev_err(dai->dev, "dai_runtime already allocated for dai %s\n", @@ -1734,20 +1731,21 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai, dai_runtime->link_id = cdns->instance; dai_runtime->stream = stream; + dai_runtime->direction = direction; - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai->playback_dma_data = dai_runtime; - else - dai->capture_dma_data = dai_runtime; + cdns->dai_runtime_array[dai->id] = dai_runtime; } else { - /* for NULL stream we release allocated dai_runtime */ - if (direction == SNDRV_PCM_STREAM_PLAYBACK) { - kfree(dai->playback_dma_data); - dai->playback_dma_data = NULL; - } else { - kfree(dai->capture_dma_data); - dai->capture_dma_data = NULL; + /* second paranoia check */ + if (!dai_runtime) { + dev_err(dai->dev, + "dai_runtime not allocated for dai %s\n", + dai->name); + return -EINVAL; } + + /* for NULL stream we release allocated dai_runtime */ + kfree(dai_runtime); + cdns->dai_runtime_array[dai->id] = NULL; } return 0; } diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h index 93f23bd46e2c2..0434d70d4b1f5 100644 --- a/drivers/soundwire/cadence_master.h +++ b/drivers/soundwire/cadence_master.h @@ -81,6 +81,7 @@ struct sdw_cdns_stream_config { * @hw_params: hw_params to be applied in .prepare step * @suspended: status set when suspended, to be used in .prepare * @paused: status set in .trigger, to be used in suspend + * @direction: stream direction */ struct sdw_cdns_dai_runtime { char *name; @@ -92,6 +93,7 @@ struct sdw_cdns_dai_runtime { struct snd_pcm_hw_params *hw_params; bool suspended; bool paused; + int direction; }; /** @@ -108,6 +110,7 @@ struct sdw_cdns_dai_runtime { * @registers: Cadence registers * @link_up: Link status * @msg_count: Messages sent on bus + * @dai_runtime_array: runtime context for each allocated DAI. */ struct sdw_cdns { struct device *dev; @@ -135,6 +138,8 @@ struct sdw_cdns { struct work_struct work; struct list_head list; + + struct sdw_cdns_dai_runtime **dai_runtime_array; }; #define bus_to_cdns(_bus) container_of(_bus, struct sdw_cdns, bus) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 1e9c6df4b62c2..e8855a2115f64 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -831,7 +831,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream, int ch, dir; int ret; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return -EIO; @@ -902,7 +902,7 @@ static int intel_prepare(struct snd_pcm_substream *substream, int ch, dir; int ret = 0; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) { dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); @@ -949,7 +949,7 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) struct sdw_cdns_dai_runtime *dai_runtime; int ret; - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return -EIO; @@ -996,13 +996,10 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai, static void *intel_get_sdw_stream(struct snd_soc_dai *dai, int direction) { + struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_cdns_dai_runtime *dai_runtime; - if (direction == SNDRV_PCM_STREAM_PLAYBACK) - dai_runtime = dai->playback_dma_data; - else - dai_runtime = dai->capture_dma_data; - + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) return ERR_PTR(-EINVAL); @@ -1025,7 +1022,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn if (res->ops && res->ops->trigger) res->ops->trigger(dai, cmd, substream->stream); - dai_runtime = snd_soc_dai_get_dma_data(dai, substream); + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) { dev_err(dai->dev, "failed to get dai runtime in %s\n", __func__); @@ -1092,15 +1089,9 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_cdns_dai_runtime *dai_runtime; - int stream; int ret; - dai_runtime = dai->playback_dma_data; - stream = SNDRV_PCM_STREAM_PLAYBACK; - if (!dai_runtime) { - dai_runtime = dai->capture_dma_data; - stream = SNDRV_PCM_STREAM_CAPTURE; - } + dai_runtime = cdns->dai_runtime_array[dai->id]; if (!dai_runtime) continue; @@ -1111,7 +1102,7 @@ static int intel_component_dais_suspend(struct snd_soc_component *component) if (dai_runtime->paused) { dai_runtime->suspended = true; - ret = intel_free_stream(sdw, stream, dai, sdw->instance); + ret = intel_free_stream(sdw, dai_runtime->direction, dai, sdw->instance); if (ret < 0) return ret; } @@ -1178,6 +1169,7 @@ static int intel_create_dai(struct sdw_cdns *cdns, static int intel_register_dai(struct sdw_intel *sdw) { + struct sdw_cdns_dai_runtime **dai_runtime_array; struct sdw_cdns_stream_config config; struct sdw_cdns *cdns = &sdw->cdns; struct sdw_cdns_streams *stream; @@ -1195,6 +1187,13 @@ static int intel_register_dai(struct sdw_intel *sdw) /* DAIs are created based on total number of PDIs supported */ num_dai = cdns->pcm.num_pdi; + dai_runtime_array = devm_kcalloc(cdns->dev, num_dai, + sizeof(struct sdw_cdns_dai_runtime *), + GFP_KERNEL); + if (!dai_runtime_array) + return -ENOMEM; + cdns->dai_runtime_array = dai_runtime_array; + dais = devm_kcalloc(cdns->dev, num_dai, sizeof(*dais), GFP_KERNEL); if (!dais) return -ENOMEM; -- GitLab From febc50b82bc95089ef1d6f68a101c8a2b701e9ce Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Tue, 8 Nov 2022 20:16:03 +0530 Subject: [PATCH 153/694] dt-bindings: soundwire: Convert text bindings to DT Schema Convert soundwire text bindings to DT Schema format. Update interrupt property items as per device tree, as it is not appropriately described in text file. Update some of the properties description with minimum and maximum range. Update secondary node info which is used to describe slave devices. Signed-off-by: Srinivasa Rao Mandadapu Co-developed-by: Ratna Deepthi Kudaravalli Signed-off-by: Ratna Deepthi Kudaravalli Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1667918763-32445-5-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Vinod Koul --- .../bindings/soundwire/qcom,sdw.txt | 215 -------------- .../bindings/soundwire/qcom,soundwire.yaml | 270 ++++++++++++++++++ 2 files changed, 270 insertions(+), 215 deletions(-) delete mode 100644 Documentation/devicetree/bindings/soundwire/qcom,sdw.txt create mode 100644 Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt deleted file mode 100644 index e0faed8dceac7..0000000000000 --- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt +++ /dev/null @@ -1,215 +0,0 @@ -Qualcomm SoundWire Controller Bindings - - -This binding describes the Qualcomm SoundWire Controller along with its -board specific bus parameters. - -- compatible: - Usage: required - Value type: - Definition: must be "qcom,soundwire-v..", - Example: - "qcom,soundwire-v1.3.0" - "qcom,soundwire-v1.5.0" - "qcom,soundwire-v1.5.1" - "qcom,soundwire-v1.6.0" - "qcom,soundwire-v1.7.0" -- reg: - Usage: required - Value type: - Definition: the base address and size of SoundWire controller - address space. - -- interrupts: - Usage: required - Value type: - Definition: should specify the SoundWire Controller core and optional - wake IRQ - -- interrupt-names: - Usage: Optional - Value type: boolean - Value type: - Definition: should be "core" for core and "wakeup" for wake interrupt. - -- wakeup-source: - Usage: Optional - Value type: boolean - Definition: should specify if SoundWire Controller is wake up capable. - -- clock-names: - Usage: required - Value type: - Definition: should be "iface" for SoundWire Controller interface clock - -- clocks: - Usage: required - Value type: - Definition: should specify the SoundWire Controller interface clock - -- #sound-dai-cells: - Usage: required - Value type: - Definition: must be 1 for digital audio interfaces on the controller. - -- qcom,dout-ports: - Usage: required - Value type: - Definition: must be count of data out ports - -- qcom,din-ports: - Usage: required - Value type: - Definition: must be count of data in ports - -- qcom,ports-offset1: - Usage: required - Value type: - Definition: should specify payload transport window offset1 of each - data port. Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-offset2: - Usage: required - Value type: - Definition: should specify payload transport window offset2 of each - data port. Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-sinterval-low: - Usage: required - Value type: - Definition: should be sample interval low of each data port. - Out ports followed by In ports. Used for Sample Interval - calculation. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-word-length: - Usage: optional - Value type: - Definition: should be size of payload channel sample. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-block-pack-mode: - Usage: optional - Value type: - Definition: should be 0 or 1 to indicate the block packing mode. - 0 to indicate Blocks are per Channel - 1 to indicate Blocks are per Port. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-block-group-count: - Usage: optional - Value type: - Definition: should be in range 1 to 4 to indicate how many sample - intervals are combined into a payload. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-lane-control: - Usage: optional - Value type: - Definition: should be in range 0 to 7 to identify which data lane - the data port uses. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-hstart: - Usage: optional - Value type: - Definition: should be number identifying lowerst numbered coloum in - SoundWire Frame, i.e. left edge of the Transport sub-frame - for each port. Values between 0 and 15 are valid. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,ports-hstop: - Usage: optional - Value type: - Definition: should be number identifying highest numbered coloum in - SoundWire Frame, i.e. the right edge of the Transport - sub-frame for each port. Values between 0 and 15 are valid. - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- qcom,dports-type: - Usage: optional - Value type: - Definition: should be one of the following types - 0 for reduced port - 1 for simple ports - 2 for full port - Out ports followed by In ports. - Value of 0xFF indicates that this option is not implemented - or applicable for the respective data port. - More info in MIPI Alliance SoundWire 1.0 Specifications. - -- reset: - Usage: optional - Value type: - Definition: Should specify the SoundWire audio CSR reset controller interface, - which is required for SoundWire version 1.6.0 and above. - -- reset-names: - Usage: optional - Value type: - Definition: should be "swr_audio_cgcr" for SoundWire audio CSR reset - controller interface. - -Note: - More Information on detail of encoding of these fields can be -found in MIPI Alliance SoundWire 1.0 Specifications. - -= SoundWire devices -Each subnode of the bus represents SoundWire device attached to it. -The properties of these nodes are defined by the individual bindings. - -= EXAMPLE -The following example represents a SoundWire controller on DB845c board -which has controller integrated inside WCD934x codec on SDM845 SoC. - -soundwire: soundwire@c85 { - compatible = "qcom,soundwire-v1.3.0"; - reg = <0xc85 0x20>; - interrupts = <20 IRQ_TYPE_EDGE_RISING>; - clocks = <&wcc>; - clock-names = "iface"; - resets = <&lpass_audiocc LPASS_AUDIO_SWR_TX_CGCR>; - reset-names = "swr_audio_cgcr"; - #sound-dai-cells = <1>; - qcom,dports-type = <0>; - qcom,dout-ports = <6>; - qcom,din-ports = <2>; - qcom,ports-sinterval-low = /bits/ 8 <0x07 0x1F 0x3F 0x7 0x1F 0x3F 0x0F 0x0F>; - qcom,ports-offset1 = /bits/ 8 <0x01 0x02 0x0C 0x6 0x12 0x0D 0x07 0x0A >; - qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x1F 0x00 0x00 0x1F 0x00 0x00>; - - /* Left Speaker */ - left{ - .... - }; - - /* Right Speaker */ - right{ - .... - }; -}; diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml new file mode 100644 index 0000000000000..bcbfa71536cda --- /dev/null +++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml @@ -0,0 +1,270 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soundwire/qcom,soundwire.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SoundWire Controller + +maintainers: + - Srinivas Kandagatla + - Srinivasa Rao Mandadapu + +description: + The Qualcomm SoundWire controller along with its board specific bus parameters. + +properties: + compatible: + enum: + - qcom,soundwire-v1.3.0 + - qcom,soundwire-v1.5.0 + - qcom,soundwire-v1.5.1 + - qcom,soundwire-v1.6.0 + - qcom,soundwire-v1.7.0 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + items: + - description: specify the SoundWire controller core. + - description: specify the Soundwire controller wake IRQ. + + interrupt-names: + minItems: 1 + items: + - const: core + - const: wakeup + + clocks: + items: + - description: iface clock + + clock-names: + items: + - const: iface + + resets: + items: + - description: SWR_AUDIO_CGCR RESET + + reset-names: + items: + - const: swr_audio_cgcr + + '#sound-dai-cells': + const: 1 + + '#address-cells': + const: 2 + + '#size-cells': + const: 0 + + wakeup-source: true + + qcom,din-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: count of data in ports + + qcom,dout-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: count of data out ports + + qcom,ports-word-length: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Size of payload channel sample. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + + qcom,ports-sinterval-low: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Sample interval low of each data port. + Out ports followed by In ports. Used for Sample Interval calculation. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-offset1: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Payload transport window offset1 of each data port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-offset2: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Payload transport window offset2 of each data port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + + qcom,ports-lane-control: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identify which data lane the data port uses. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + + qcom,ports-block-pack-mode: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Indicate the block packing mode. + 0 to indicate Blocks are per Channel + 1 to indicate Blocks are per Port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 8 + items: + oneOf: + - minimum: 0 + maximum: 1 + - const: 0xff + + qcom,ports-hstart: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identifying lowerst numbered coloum in SoundWire Frame, + i.e. left edge of the Transport sub-frame for each port. + Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 15 + - const: 0xff + + qcom,ports-hstop: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + Identifying highest numbered coloum in SoundWire Frame, + i.e. the right edge of the Transport + sub-frame for each port. Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 15 + - const: 0xff + + qcom,ports-block-group-count: + $ref: /schemas/types.yaml#/definitions/uint8-array + description: + In range 1 to 4 to indicate how many sample intervals are combined + into a payload. Out ports followed by In ports. + Value of 0xff indicates that this option is not implemented + or applicable for the respective data port. + More info in MIPI Alliance SoundWire 1.0 Specifications. + minItems: 3 + maxItems: 5 + items: + oneOf: + - minimum: 0 + maximum: 4 + - const: 0xff + + label: + maxItems: 1 + +patternProperties: + "^.*@[0-9a-f],[0-9a-f]$": + type: object + description: + Child nodes for a standalone audio codec or speaker amplifier IC. + It has RX and TX Soundwire secondary devices. + properties: + compatible: + pattern: "^sdw[0-9a-f]{1}[0-9a-f]{4}[0-9a-f]{4}[0-9a-f]{2}$" + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - '#sound-dai-cells' + - '#address-cells' + - '#size-cells' + - qcom,dout-ports + - qcom,din-ports + - qcom,ports-sinterval-low + - qcom,ports-offset1 + - qcom,ports-offset2 + +additionalProperties: false + +examples: + - | + #include + #include + #include + + soundwire@3210000 { + compatible = "qcom,soundwire-v1.6.0"; + reg = <0x03210000 0x2000>; + + interrupts = , + <&pdc 130 IRQ_TYPE_LEVEL_HIGH>; + + interrupt-names = "core", "wakeup"; + + clocks = <&lpass_rx_macro>; + clock-names = "iface"; + + qcom,din-ports = <0>; + qcom,dout-ports = <5>; + + resets = <&lpass_audiocc LPASS_AUDIO_SWR_RX_CGCR>; + reset-names = "swr_audio_cgcr"; + + qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff>; + qcom,ports-sinterval-low = /bits/ 8 <0x03 0x3f 0x1f 0x03 0x03>; + qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x01>; + qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00>; + qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00>; + qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff>; + qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff>; + qcom,ports-hstop = /bits/ 8 <0xff 0x06 0xff 0xff 0xff>; + qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0x00>; + + #sound-dai-cells = <1>; + #address-cells = <2>; + #size-cells = <0>; + + codec@0,4 { + compatible = "sdw20217010d00"; + reg = <0 4>; + qcom,rx-port-mapping = <1 2 3 4 5>; + }; + }; -- GitLab From defbab270d45e32b068e7e73c3567232d745c60f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 27 Sep 2022 14:52:56 -0700 Subject: [PATCH 154/694] include/uapi/linux/swab: Fix potentially missing __always_inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining of some byteswap operations") added __always_inline to swab functions and commit 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") added a definition of __always_inline for use in exported headers when the kernel's compiler.h is not available. However, since swab.h does not include stddef.h, if the header soup does not indirectly include it, the definition of __always_inline is missing, resulting in a compilation failure, which was observed compiling the perf tool using exported headers containing this commit: In file included from /usr/include/linux/byteorder/little_endian.h:12:0, from /usr/include/asm/byteorder.h:14, from tools/include/uapi/linux/perf_event.h:20, from perf.h:8, from builtin-bench.c:18: /usr/include/linux/swab.h:160:8: error: unknown type name `__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) Fix this by replacing the inclusion of linux/compiler.h with linux/stddef.h to ensure that we pick up that definition if required, without relying on it's indirect inclusion. compiler.h is then included indirectly, via stddef.h. Fixes: 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") Signed-off-by: Matt Redfearn Signed-off-by: Florian Fainelli Signed-off-by: Arnd Bergmann Tested-by: Nathan Chancellor Reviewed-by: Petr Vaněk Signed-off-by: Arnd Bergmann --- include/uapi/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 0723a9cce747c..01717181339eb 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_SWAB_H #include -#include +#include #include #include -- GitLab From 253b642eec936974cc709ebb6bed83786c391279 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:24 +0100 Subject: [PATCH 155/694] phy: qcom-qmp-pcie: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 7c81667dd9680..4e5111d19692c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2282,17 +2282,17 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, static const struct of_device_id qmp_pcie_of_match_table[] = { { - .compatible = "qcom,msm8998-qmp-pcie-phy", - .data = &msm8998_pciephy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-pcie-phy", - .data = &ipq8074_pciephy_cfg, + .compatible = "qcom,ipq6018-qmp-pcie-phy", + .data = &ipq6018_pciephy_cfg, }, { .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", .data = &ipq8074_pciephy_gen3_cfg, }, { - .compatible = "qcom,ipq6018-qmp-pcie-phy", - .data = &ipq6018_pciephy_cfg, + .compatible = "qcom,ipq8074-qmp-pcie-phy", + .data = &ipq8074_pciephy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-pcie-phy", + .data = &msm8998_pciephy_cfg, }, { .compatible = "qcom,sc8180x-qmp-pcie-phy", .data = &sc8180x_pciephy_cfg, @@ -2302,6 +2302,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sdm845-qmp-pcie-phy", .data = &sdm845_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-pcie-phy", + .data = &sdx55_qmp_pciephy_cfg, }, { .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", .data = &sm8250_qmp_gen3x1_pciephy_cfg, @@ -2311,9 +2314,6 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sm8250-qmp-modem-pcie-phy", .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-pcie-phy", - .data = &sdx55_qmp_pciephy_cfg, }, { .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", .data = &sm8450_qmp_gen3x1_pciephy_cfg, -- GitLab From cebc6ca76e400a90cb7cbc9f96f26966167f5b6f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:25 +0100 Subject: [PATCH 156/694] phy: qcom-qmp-pcie: move device-id table Move the device-id table below probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 4e5111d19692c..e66f6adc404ba 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2280,51 +2280,6 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_pcie_of_match_table[] = { - { - .compatible = "qcom,ipq6018-qmp-pcie-phy", - .data = &ipq6018_pciephy_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", - .data = &ipq8074_pciephy_gen3_cfg, - }, { - .compatible = "qcom,ipq8074-qmp-pcie-phy", - .data = &ipq8074_pciephy_cfg, - }, { - .compatible = "qcom,msm8998-qmp-pcie-phy", - .data = &msm8998_pciephy_cfg, - }, { - .compatible = "qcom,sc8180x-qmp-pcie-phy", - .data = &sc8180x_pciephy_cfg, - }, { - .compatible = "qcom,sdm845-qhp-pcie-phy", - .data = &sdm845_qhp_pciephy_cfg, - }, { - .compatible = "qcom,sdm845-qmp-pcie-phy", - .data = &sdm845_qmp_pciephy_cfg, - }, { - .compatible = "qcom,sdx55-qmp-pcie-phy", - .data = &sdx55_qmp_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", - .data = &sm8250_qmp_gen3x1_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy", - .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sm8250-qmp-modem-pcie-phy", - .data = &sm8250_qmp_gen3x2_pciephy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", - .data = &sm8450_qmp_gen3x1_pciephy_cfg, - }, { - .compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy", - .data = &sm8450_qmp_gen4x2_pciephy_cfg, - }, - { }, -}; -MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table); - static int qmp_pcie_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; @@ -2408,6 +2363,51 @@ static int qmp_pcie_probe(struct platform_device *pdev) return ret; } +static const struct of_device_id qmp_pcie_of_match_table[] = { + { + .compatible = "qcom,ipq6018-qmp-pcie-phy", + .data = &ipq6018_pciephy_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-gen3-pcie-phy", + .data = &ipq8074_pciephy_gen3_cfg, + }, { + .compatible = "qcom,ipq8074-qmp-pcie-phy", + .data = &ipq8074_pciephy_cfg, + }, { + .compatible = "qcom,msm8998-qmp-pcie-phy", + .data = &msm8998_pciephy_cfg, + }, { + .compatible = "qcom,sc8180x-qmp-pcie-phy", + .data = &sc8180x_pciephy_cfg, + }, { + .compatible = "qcom,sdm845-qhp-pcie-phy", + .data = &sdm845_qhp_pciephy_cfg, + }, { + .compatible = "qcom,sdm845-qmp-pcie-phy", + .data = &sdm845_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sdx55-qmp-pcie-phy", + .data = &sdx55_qmp_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy", + .data = &sm8250_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy", + .data = &sm8250_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8250-qmp-modem-pcie-phy", + .data = &sm8250_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy", + .data = &sm8450_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy", + .data = &sm8450_qmp_gen4x2_pciephy_cfg, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table); + static struct platform_driver qmp_pcie_driver = { .probe = qmp_pcie_probe, .driver = { -- GitLab From 2fdedef3ea8e8a1a68a1da6eee1537074b308f63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:26 +0100 Subject: [PATCH 157/694] phy: qcom-qmp-pcie: merge driver data The PCIe QMP PHY driver only manages a single PHY so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 218 +++++++++-------------- 1 file changed, 88 insertions(+), 130 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e66f6adc404ba..667a87e7c9179 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1365,56 +1365,26 @@ struct qmp_phy_cfg { unsigned long pipe_clock_rate; }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: currently selected PHY mode - */ -struct qmp_phy { - struct phy *phy; +struct qmp_pcie { + struct device *dev; + const struct qmp_phy_cfg *cfg; + void __iomem *serdes; + void __iomem *pcs; + void __iomem *pcs_misc; void __iomem *tx; void __iomem *rx; - void __iomem *pcs; void __iomem *tx2; void __iomem *rx2; - void __iomem *pcs_misc; - struct clk *pipe_clk; - struct qcom_qmp *qmp; - int mode; -}; - -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - */ -struct qcom_qmp { - struct device *dev; + struct clk *pipe_clk; struct clk_bulk_data *clks; struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; - struct qmp_phy **phys; + struct phy *phy; + int mode; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -1850,9 +1820,9 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - void __iomem *serdes = qphy->serdes; + void __iomem *serdes = qmp->serdes; if (!tables) return; @@ -1860,11 +1830,11 @@ static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_ qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); } -static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; if (!tables) return; @@ -1872,17 +1842,17 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2); qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2); } -static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) { - void __iomem *pcs = qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + void __iomem *pcs = qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; if (!tables) return; @@ -1893,9 +1863,8 @@ static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tab static int qmp_pcie_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs); @@ -1932,9 +1901,8 @@ static int qmp_pcie_init(struct phy *phy) static int qmp_pcie_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; reset_control_bulk_assert(cfg->num_resets, qmp->resets); @@ -1947,11 +1915,10 @@ static int qmp_pcie_exit(struct phy *phy) static int qmp_pcie_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; const struct qmp_phy_cfg_tables *mode_tables; - void __iomem *pcs = qphy->pcs; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int mask, val; int ret; @@ -1959,26 +1926,26 @@ static int qmp_pcie_power_on(struct phy *phy) qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], cfg->pwrdn_ctrl); - if (qphy->mode == PHY_MODE_PCIE_RC) + if (qmp->mode == PHY_MODE_PCIE_RC) mode_tables = cfg->tables_rc; else mode_tables = cfg->tables_ep; - qmp_pcie_serdes_init(qphy, &cfg->tables); - qmp_pcie_serdes_init(qphy, mode_tables); + qmp_pcie_serdes_init(qmp, &cfg->tables); + qmp_pcie_serdes_init(qmp, mode_tables); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; } /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qphy, &cfg->tables); - qmp_pcie_lanes_init(qphy, mode_tables); + qmp_pcie_lanes_init(qmp, &cfg->tables); + qmp_pcie_lanes_init(qmp, mode_tables); - qmp_pcie_pcs_init(qphy, &cfg->tables); - qmp_pcie_pcs_init(qphy, mode_tables); + qmp_pcie_pcs_init(qmp, &cfg->tables); + qmp_pcie_pcs_init(qmp, mode_tables); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2001,27 +1968,27 @@ static int qmp_pcie_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_pcie_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_pcie *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], cfg->pwrdn_ctrl); return 0; @@ -2055,12 +2022,12 @@ static int qmp_pcie_disable(struct phy *phy) static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_pcie *qmp = phy_get_drvdata(phy); switch (submode) { case PHY_MODE_PCIE_RC: case PHY_MODE_PCIE_EP: - qphy->mode = submode; + qmp->mode = submode; break; default: dev_err(&phy->dev, "Unsupported submode %d\n", submode); @@ -2072,7 +2039,7 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int i; @@ -2088,7 +2055,7 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2109,7 +2076,7 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2146,7 +2113,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2168,8 +2135,8 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) * Controllers using QMP PHY-s use 125MHz pipe clock interface * unless other frequency is specified in the PHY config. */ - if (qmp->phys[0]->cfg->pipe_clock_rate) - fixed->fixed_rate = qmp->phys[0]->cfg->pipe_clock_rate; + if (qmp->cfg->pipe_clock_rate) + fixed->fixed_rate = qmp->cfg->pipe_clock_rate; else fixed->fixed_rate = 125000000; @@ -2197,97 +2164,92 @@ static const struct phy_ops qmp_pcie_ops = { .owner = THIS_MODULE, }; -static int qmp_pcie_create(struct device *dev, struct device_node *np, int id, +static int qmp_pcie_create(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_pcie *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; + qmp->mode = PHY_MODE_PCIE_RC; - qphy->mode = PHY_MODE_PCIE_RC; + qmp->cfg = cfg; + qmp->serdes = serdes; - qphy->cfg = cfg; - qphy->serdes = serdes; /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); if (of_device_is_compatible(dev->of_node, "qcom,sdm845-qhp-pcie-phy")) - qphy->rx = qphy->tx; + qmp->rx = qmp->tx; else - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL); } - if (IS_ERR(qphy->pcs_misc) && + if (IS_ERR(qmp->pcs_misc) && of_device_is_compatible(dev->of_node, "qcom,ipq6018-qmp-pcie-phy")) - qphy->pcs_misc = qphy->pcs + 0x400; + qmp->pcs_misc = qmp->pcs + 0x400; - if (IS_ERR(qphy->pcs_misc)) { + if (IS_ERR(qmp->pcs_misc)) { if (cfg->tables.pcs_misc || (cfg->tables_rc && cfg->tables_rc->pcs_misc) || - (cfg->tables_ep && cfg->tables_ep->pcs_misc)) - return PTR_ERR(qphy->pcs_misc); + (cfg->tables_ep && cfg->tables_ep->pcs_misc)) { + return PTR_ERR(qmp->pcs_misc); + } } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe clock\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create PHY: %d\n", ret); return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_pcie_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; + struct qmp_pcie *qmp; int num, id; int ret; @@ -2326,14 +2288,10 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (num > 1) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - id = 0; for_each_available_child_of_node(dev->of_node, child) { /* Create per-lane phy */ - ret = qmp_pcie_create(dev, child, id, serdes, cfg); + ret = qmp_pcie_create(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); -- GitLab From 393ed5d515494250712dd1703418bb541765afe3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:27 +0100 Subject: [PATCH 158/694] phy: qcom-qmp-pcie: clean up device-tree parsing Since the QMP driver split there will be at most a single child node so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there's no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if there are no child nodes at all for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 34 +++++++----------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 667a87e7c9179..bc96518ad6b0e 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2250,7 +2250,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) void __iomem *serdes; const struct qmp_phy_cfg *cfg = NULL; struct qmp_pcie *qmp; - int num, id; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2283,34 +2282,19 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > 1) + child = of_get_next_available_child(dev->of_node, NULL); + if (!child) return -EINVAL; - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - /* Create per-lane phy */ - ret = qmp_pcie_create(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } + ret = qmp_pcie_create(dev, child, serdes, cfg); + if (ret) + goto err_node_put; - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } + ret = phy_pipe_clk_register(qmp, child); + if (ret) + goto err_node_put; - id++; - } + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From 52b997732eb6bf64df242403c7510520d89be266 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:28 +0100 Subject: [PATCH 159/694] phy: qcom-qmp-pcie: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 49 +++++++++++------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bc96518ad6b0e..e30cbc94cbf6b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2037,9 +2037,10 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } -static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_vreg_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int i; @@ -2053,9 +2054,10 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) return devm_regulator_bulk_get(dev, num, qmp->vregs); } -static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_reset_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2074,9 +2076,10 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg return 0; } -static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_clk_init(struct qmp_pcie *qmp) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2164,18 +2167,15 @@ static const struct phy_ops qmp_pcie_ops = { .owner = THIS_MODULE, }; -static int qmp_pcie_create(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) { - struct qmp_pcie *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; qmp->mode = PHY_MODE_PCIE_RC; - qmp->cfg = cfg; - qmp->serdes = serdes; - /* * Get memory resources for the PHY: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2247,8 +2247,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; - void __iomem *serdes; - const struct qmp_phy_cfg *cfg = NULL; struct qmp_pcie *qmp; int ret; @@ -2257,28 +2255,27 @@ static int qmp_pcie_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - WARN_ON_ONCE(!cfg->pwrdn_ctrl); - WARN_ON_ONCE(!cfg->phy_status); + WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl); + WARN_ON_ONCE(!qmp->cfg->phy_status); - serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); - ret = qmp_pcie_clk_init(dev, cfg); + ret = qmp_pcie_clk_init(qmp); if (ret) return ret; - ret = qmp_pcie_reset_init(dev, cfg); + ret = qmp_pcie_reset_init(qmp); if (ret) return ret; - ret = qmp_pcie_vreg_init(dev, cfg); + ret = qmp_pcie_vreg_init(qmp); if (ret) return ret; @@ -2286,7 +2283,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_pcie_create(dev, child, serdes, cfg); + ret = qmp_pcie_create(qmp, child); if (ret) goto err_node_put; -- GitLab From 63bf101ae1912570260912087bfd7b1cf420c3dc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:29 +0100 Subject: [PATCH 160/694] phy: qcom-qmp-pcie: rename PHY ops structure Rename the PHY operation structure so that it has a "phy_ops" suffix and move it next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index e30cbc94cbf6b..bd946438e3c37 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2037,6 +2037,13 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_pcie_phy_ops = { + .power_on = qmp_pcie_enable, + .power_off = qmp_pcie_disable, + .set_mode = qmp_pcie_set_mode, + .owner = THIS_MODULE, +}; + static int qmp_pcie_vreg_init(struct qmp_pcie *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2160,13 +2167,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_pcie_ops = { - .power_on = qmp_pcie_enable, - .power_off = qmp_pcie_disable, - .set_mode = qmp_pcie_set_mode, - .owner = THIS_MODULE, -}; - static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2229,7 +2229,7 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops); + generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); dev_err(dev, "failed to create PHY: %d\n", ret); -- GitLab From f8b641146484b6be33c291fa8279ffe423e169d9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:30 +0100 Subject: [PATCH 161/694] phy: qcom-qmp-pcie: clean up PHY lane init Clean up the PHY lane initialisation somewhat by adding further temporary variables and programming both tx and rx for the second lane after the first lane. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index bd946438e3c37..dd7e72424fc04 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1835,18 +1835,19 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; + void __iomem *tx2 = qmp->tx2; + void __iomem *rx2 = qmp->rx2; if (!tables) return; qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); - - if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); - if (cfg->lanes >= 2) - qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2); + + if (cfg->lanes >= 2) { + qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2); + qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2); + } } static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) -- GitLab From d8c9a1e9c223951836692eb8c3810e18ae06ffc2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:31 +0100 Subject: [PATCH 162/694] phy: qcom-qmp-pcie: use shorter tables identifiers The QMP drivers all use 'tbl' to refer to their register initialisation tables. For consistency use 'tbls' rather than 'tables' to refer to the new aggregate table structures. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221105145939.20318-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index dd7e72424fc04..a977f2bbd8317 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1313,7 +1313,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), }; -struct qmp_phy_cfg_tables { +struct qmp_phy_cfg_tbls { const struct qmp_phy_init_tbl *serdes; int serdes_num; const struct qmp_phy_init_tbl *tx; @@ -1331,7 +1331,7 @@ struct qmp_phy_cfg { int lanes; /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ - const struct qmp_phy_cfg_tables tables; + const struct qmp_phy_cfg_tbls tbls; /* * Additional init sequences for PHY blocks, providing additional * register programming. They are used for providing separate sequences @@ -1339,8 +1339,8 @@ struct qmp_phy_cfg { * * If EP mode is not supported, both tables can be left unset. */ - const struct qmp_phy_cfg_tables *tables_rc; - const struct qmp_phy_cfg_tables *tables_ep; + const struct qmp_phy_cfg_tbls *tbls_rc; + const struct qmp_phy_cfg_tbls *tbls_ep; /* clock ids to be requested */ const char * const *clk_list; @@ -1442,7 +1442,7 @@ static const char * const sdm845_pciephy_reset_l[] = { static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq8074_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq8074_pcie_serdes_tbl), .tx = ipq8074_pcie_tx_tbl, @@ -1467,7 +1467,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq8074_pcie_gen3_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl), .tx = ipq8074_pcie_gen3_tx_tbl, @@ -1494,7 +1494,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = ipq6018_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(ipq6018_pcie_serdes_tbl), .tx = ipq6018_pcie_tx_tbl, @@ -1521,7 +1521,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sdm845_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl), .tx = sdm845_qmp_pcie_tx_tbl, @@ -1548,7 +1548,7 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sdm845_qhp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl), .tx = sdm845_qhp_pcie_tx_tbl, @@ -1573,7 +1573,7 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sm8250_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), .tx = sm8250_qmp_pcie_tx_tbl, @@ -1585,7 +1585,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8250_qmp_gen3x1_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl), .rx = sm8250_qmp_gen3x1_pcie_rx_tbl, @@ -1610,7 +1610,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sm8250_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), .tx = sm8250_qmp_pcie_tx_tbl, @@ -1622,7 +1622,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { .pcs_misc = sm8250_qmp_pcie_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .tx = sm8250_qmp_gen3x2_pcie_tx_tbl, .tx_num = ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl), .rx = sm8250_qmp_gen3x2_pcie_rx_tbl, @@ -1647,7 +1647,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = { static const struct qmp_phy_cfg msm8998_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = msm8998_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(msm8998_pcie_serdes_tbl), .tx = msm8998_pcie_tx_tbl, @@ -1674,7 +1674,7 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = { static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sc8180x_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), .tx = sc8180x_qmp_pcie_tx_tbl, @@ -1701,7 +1701,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sdx55_qmp_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl), .tx = sdx55_qmp_pcie_tx_tbl, @@ -1728,7 +1728,7 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { .lanes = 1, - .tables = { + .tbls = { .serdes = sm8450_qmp_gen3x1_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl), .tx = sm8450_qmp_gen3x1_pcie_tx_tbl, @@ -1755,7 +1755,7 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = { static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .lanes = 2, - .tables = { + .tbls = { .serdes = sm8450_qmp_gen4x2_pcie_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl), .tx = sm8450_qmp_gen4x2_pcie_tx_tbl, @@ -1768,14 +1768,14 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = { .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl), }, - .tables_rc = &(const struct qmp_phy_cfg_tables) { + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8450_qmp_gen4x2_pcie_rc_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl), .pcs_misc = sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl, .pcs_misc_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl), }, - .tables_ep = &(const struct qmp_phy_cfg_tables) { + .tbls_ep = &(const struct qmp_phy_cfg_tbls) { .serdes = sm8450_qmp_gen4x2_pcie_ep_serdes_tbl, .serdes_num = ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl), .pcs_misc = sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl, @@ -1820,17 +1820,17 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { void __iomem *serdes = qmp->serdes; - if (!tables) + if (!tbls) return; - qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num); + qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num); } -static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *tx = qmp->tx; @@ -1838,28 +1838,28 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; - if (!tables) + if (!tbls) return; - qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1); - qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1); + qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1); + qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1); if (cfg->lanes >= 2) { - qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2); - qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2); + qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2); + qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2); } } -static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables) +static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { void __iomem *pcs = qmp->pcs; void __iomem *pcs_misc = qmp->pcs_misc; - if (!tables) + if (!tbls) return; - qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num); - qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num); + qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); + qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); } static int qmp_pcie_init(struct phy *phy) @@ -1918,7 +1918,7 @@ static int qmp_pcie_power_on(struct phy *phy) { struct qmp_pcie *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; - const struct qmp_phy_cfg_tables *mode_tables; + const struct qmp_phy_cfg_tbls *mode_tbls; void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int mask, val; @@ -1928,12 +1928,12 @@ static int qmp_pcie_power_on(struct phy *phy) cfg->pwrdn_ctrl); if (qmp->mode == PHY_MODE_PCIE_RC) - mode_tables = cfg->tables_rc; + mode_tbls = cfg->tbls_rc; else - mode_tables = cfg->tables_ep; + mode_tbls = cfg->tbls_ep; - qmp_pcie_serdes_init(qmp, &cfg->tables); - qmp_pcie_serdes_init(qmp, mode_tables); + qmp_pcie_serdes_init(qmp, &cfg->tbls); + qmp_pcie_serdes_init(qmp, mode_tbls); ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { @@ -1942,11 +1942,11 @@ static int qmp_pcie_power_on(struct phy *phy) } /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qmp, &cfg->tables); - qmp_pcie_lanes_init(qmp, mode_tables); + qmp_pcie_lanes_init(qmp, &cfg->tbls); + qmp_pcie_lanes_init(qmp, mode_tbls); - qmp_pcie_pcs_init(qmp, &cfg->tables); - qmp_pcie_pcs_init(qmp, mode_tables); + qmp_pcie_pcs_init(qmp, &cfg->tbls); + qmp_pcie_pcs_init(qmp, mode_tbls); /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2217,9 +2217,9 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) qmp->pcs_misc = qmp->pcs + 0x400; if (IS_ERR(qmp->pcs_misc)) { - if (cfg->tables.pcs_misc || - (cfg->tables_rc && cfg->tables_rc->pcs_misc) || - (cfg->tables_ep && cfg->tables_ep->pcs_misc)) { + if (cfg->tbls.pcs_misc || + (cfg->tbls_rc && cfg->tbls_rc->pcs_misc) || + (cfg->tbls_ep && cfg->tbls_ep->pcs_misc)) { return PTR_ERR(qmp->pcs_misc); } } -- GitLab From ec7bc1b40b363c46af0b17d4cb0ea4dc21b7cf9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:32 +0100 Subject: [PATCH 163/694] phy: qcom-qmp-pcie: add register init helper Generalise the serdes initialisation helper so that it can be used to initialise all the PHY registers (e.g. serdes, tx, rx, pcs). Note that this defers the ungating of the PIPE clock somewhat, which is fine as it isn't needed until starting the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 37 +++++------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index a977f2bbd8317..09999d5b52689 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1820,27 +1820,22 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } -static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) -{ - void __iomem *serdes = qmp->serdes; - - if (!tbls) - return; - - qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num); -} - -static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) +static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; + void __iomem *pcs = qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; if (!tbls) return; + qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num); + qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1); qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1); @@ -1848,15 +1843,6 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2); qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2); } -} - -static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) -{ - void __iomem *pcs = qmp->pcs; - void __iomem *pcs_misc = qmp->pcs_misc; - - if (!tbls) - return; qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); @@ -1932,8 +1918,8 @@ static int qmp_pcie_power_on(struct phy *phy) else mode_tbls = cfg->tbls_ep; - qmp_pcie_serdes_init(qmp, &cfg->tbls); - qmp_pcie_serdes_init(qmp, mode_tbls); + qmp_pcie_init_registers(qmp, &cfg->tbls); + qmp_pcie_init_registers(qmp, mode_tbls); ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { @@ -1941,13 +1927,6 @@ static int qmp_pcie_power_on(struct phy *phy) return ret; } - /* Tx, Rx, and PCS configurations */ - qmp_pcie_lanes_init(qmp, &cfg->tbls); - qmp_pcie_lanes_init(qmp, mode_tbls); - - qmp_pcie_pcs_init(qmp, &cfg->tbls); - qmp_pcie_pcs_init(qmp, mode_tbls); - /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); -- GitLab From dcb93f47dd14cb0c206424b3258399b4cd205b20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:33 +0100 Subject: [PATCH 164/694] dt-bindings: phy: qcom,qmp-pcie: rename current bindings The current QMP PCIe PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. Later QMP PCIe PHY blocks only provide a single PHY and the remnant child node does not really reflect the hardware. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the per lane PCS registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current schema file after IPQ8074, which was the first SoC added to the bindings after MSM8996 (which has already been split out), and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...om,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} (96%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml similarity index 96% rename from Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 324ad7d03a383..62045dcfb20c5 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/phy/qcom,qmp-pcie-phy.yaml# +$id: http://devicetree.org/schemas/phy/qcom,ipq8074-qmp-pcie-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP PHY controller (PCIe) +title: Qualcomm QMP PHY controller (PCIe, IPQ8074) maintainers: - Vinod Koul @@ -13,6 +13,9 @@ description: QMP PHY controller supports physical layer functionality for a number of controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-pcie-phy.yaml. + properties: compatible: enum: -- GitLab From 306382305c5cefce892784da8686c242956f5fd2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:34 +0100 Subject: [PATCH 165/694] dt-bindings: phy: qcom,qmp-pcie: add sc8280xp bindings Add bindings for the PCIe QMP PHYs found on SC8280XP. The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in 4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as PCIe2A and PCIe2B). The configuration for a specific system can be read from a TCSR register. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml new file mode 100644 index 0000000000000..80aa8d2507fb6 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-pcie-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP PHY controller (PCIe, SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-gen3x1-pcie-phy + - qcom,sc8280xp-qmp-gen3x2-pcie-phy + - qcom,sc8280xp-qmp-gen3x4-pcie-phy + + reg: + minItems: 1 + maxItems: 2 + + clocks: + maxItems: 6 + + clock-names: + items: + - const: aux + - const: cfg_ahb + - const: ref + - const: rchng + - const: pipe + - const: pipediv2 + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: phy + + vdda-phy-supply: true + + vdda-pll-supply: true + + qcom,4ln-config-sel: + description: PCIe 4-lane configuration + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - items: + - description: phandle of TCSR syscon + - description: offset of PCIe 4-lane configuration register + - description: offset of configuration bit for this PHY + + "#clock-cells": + const: 0 + + clock-output-names: + maxItems: 1 + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - clock-output-names + - "#phy-cells" + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,sc8280xp-qmp-gen3x4-pcie-phy + then: + properties: + reg: + items: + - description: port a + - description: port b + required: + - qcom,4ln-config-sel + else: + properties: + reg: + maxItems: 1 + +examples: + - | + #include + + pcie2b_phy: phy@1c18000 { + compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy"; + reg = <0x01c18000 0x2000>; + + clocks = <&gcc GCC_PCIE_2B_AUX_CLK>, + <&gcc GCC_PCIE_2B_CFG_AHB_CLK>, + <&gcc GCC_PCIE_2A2B_CLKREF_CLK>, + <&gcc GCC_PCIE2B_PHY_RCHNG_CLK>, + <&gcc GCC_PCIE_2B_PIPE_CLK>, + <&gcc GCC_PCIE_2B_PIPEDIV2_CLK>; + clock-names = "aux", "cfg_ahb", "ref", "rchng", + "pipe", "pipediv2"; + + power-domains = <&gcc PCIE_2B_GDSC>; + + resets = <&gcc GCC_PCIE_2B_PHY_BCR>; + reset-names = "phy"; + + vdda-phy-supply = <&vreg_l6d>; + vdda-pll-supply = <&vreg_l4d>; + + #clock-cells = <0>; + clock-output-names = "pcie_2b_pipe_clk"; + + #phy-cells = <0>; + }; + + pcie2a_phy: phy@1c24000 { + compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy"; + reg = <0x01c24000 0x2000>, <0x01c26000 0x2000>; + + clocks = <&gcc GCC_PCIE_2A_AUX_CLK>, + <&gcc GCC_PCIE_2A_CFG_AHB_CLK>, + <&gcc GCC_PCIE_2A2B_CLKREF_CLK>, + <&gcc GCC_PCIE2A_PHY_RCHNG_CLK>, + <&gcc GCC_PCIE_2A_PIPE_CLK>, + <&gcc GCC_PCIE_2A_PIPEDIV2_CLK>; + clock-names = "aux", "cfg_ahb", "ref", "rchng", + "pipe", "pipediv2"; + + power-domains = <&gcc PCIE_2A_GDSC>; + + resets = <&gcc GCC_PCIE_2A_PHY_BCR>; + reset-names = "phy"; + + vdda-phy-supply = <&vreg_l6d>; + vdda-pll-supply = <&vreg_l4d>; + + qcom,4ln-config-sel = <&tcsr 0xa044 0>; + + #clock-cells = <0>; + clock-output-names = "pcie_2a_pipe_clk"; + + #phy-cells = <0>; + }; -- GitLab From 7bc609e34899dd3065fc9cbc73bb8a4902e257df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:35 +0100 Subject: [PATCH 166/694] phy: qcom-qmp-pcie: restructure PHY creation In preparation for supporting devicetree bindings which do not use a child node, move the PHY creation to probe() proper and parse the serdes resource in what is now the legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 36 +++++++++++------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 09999d5b52689..8af84ff755ab1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -2147,14 +2147,15 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) +static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np) { + struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; - qmp->mode = PHY_MODE_PCIE_RC; + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); /* * Get memory resources for the PHY: @@ -2209,16 +2210,6 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create PHY: %d\n", ret); - return ret; - } - - qmp->phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } @@ -2243,10 +2234,6 @@ static int qmp_pcie_probe(struct platform_device *pdev) WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl); WARN_ON_ONCE(!qmp->cfg->phy_status); - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - ret = qmp_pcie_clk_init(qmp); if (ret) return ret; @@ -2263,7 +2250,7 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (!child) return -EINVAL; - ret = qmp_pcie_create(qmp, child); + ret = qmp_pcie_parse_dt_legacy(qmp, child); if (ret) goto err_node_put; @@ -2271,6 +2258,17 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) goto err_node_put; + qmp->mode = PHY_MODE_PCIE_RC; + + qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops); + if (IS_ERR(qmp->phy)) { + ret = PTR_ERR(qmp->phy); + dev_err(dev, "failed to create PHY: %d\n", ret); + goto err_node_put; + } + + phy_set_drvdata(qmp->phy, qmp); + of_node_put(child); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From fffdeaf853d8088c5149fc776974344b0f815dc8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:36 +0100 Subject: [PATCH 167/694] phy: qcom-qmp-pcie: fix initialisation reset Add the missing delay after asserting reset. This is specifically needed for the reset to have any effect on SC8280XP. The vendor driver uses a 1 ms delay, but that seems a bit excessive. Instead use a 200 us delay which appears to be more than enough and also matches the UFS reset delay added by commit 870b1279c7a0 ("scsi: ufs-qcom: Add reset control support for host controller"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 8af84ff755ab1..06844552922ee 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1866,6 +1866,8 @@ static int qmp_pcie_init(struct phy *phy) goto err_disable_regulators; } + usleep_range(200, 300); + ret = reset_control_bulk_deassert(cfg->num_resets, qmp->resets); if (ret) { dev_err(qmp->dev, "reset deassert failed\n"); -- GitLab From 9e420f1e7eddbbb6b73a78aca2c280ddd8a63096 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:37 +0100 Subject: [PATCH 168/694] phy: qcom-qmp-pcie: add support for pipediv2 clock Some QMP PHYs have a second fixed-divider pipe clock that needs to be enabled along with the pipe clock. Add support for an optional "pipediv2" clock. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 25 ++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 06844552922ee..d671b05c73dde 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1378,8 +1378,10 @@ struct qmp_pcie { void __iomem *tx2; void __iomem *rx2; - struct clk *pipe_clk; struct clk_bulk_data *clks; + struct clk_bulk_data pipe_clks[2]; + int num_pipe_clks; + struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; @@ -1923,11 +1925,9 @@ static int qmp_pcie_power_on(struct phy *phy) qmp_pcie_init_registers(qmp, &cfg->tbls); qmp_pcie_init_registers(qmp, mode_tbls); - ret = clk_prepare_enable(qmp->pipe_clk); - if (ret) { - dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); + ret = clk_bulk_prepare_enable(qmp->num_pipe_clks, qmp->pipe_clks); + if (ret) return ret; - } /* Pull PHY out of reset state */ qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -1950,7 +1950,7 @@ static int qmp_pcie_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qmp->pipe_clk); + clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks); return ret; } @@ -1960,7 +1960,7 @@ static int qmp_pcie_power_off(struct phy *phy) struct qmp_pcie *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qmp->pipe_clk); + clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks); /* PHY reset */ qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); @@ -2154,6 +2154,7 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np struct platform_device *pdev = to_platform_device(qmp->dev); const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; + struct clk *clk; qmp->serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(qmp->serdes)) @@ -2206,12 +2207,16 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np } } - qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qmp->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(clk)) { + return dev_err_probe(dev, PTR_ERR(clk), "failed to get pipe clock\n"); } + qmp->num_pipe_clks = 1; + qmp->pipe_clks[0].id = "pipe"; + qmp->pipe_clks[0].clk = clk; + return 0; } -- GitLab From d0a846ba28ddb589fc5a5741ae566e19c1034034 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:38 +0100 Subject: [PATCH 169/694] phy: qcom-qmp-pcie: add support for sc8280xp Add support for the single and dual-lane PHYs found on SC8280XP. Note that the SC8280XP binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 299 +++++++++++++++++- .../phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h | 2 + 2 files changed, 291 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index d671b05c73dde..f507a67a8361b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -834,6 +834,143 @@ static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), }; +static const struct qmp_phy_init_tbl sc8280xp_qmp_pcie_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x4c), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x68), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0xab), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0xaa), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE0, 0x24), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE1, 0xb4), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE2_MODE1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xb9), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0x94), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_BUF_ENABLE, 0x07), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x1d), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbf), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_TX_ADAPT_POST_THRESH, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x77), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_EQ_CONFIG2, 0x0f), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_tx_tbl[] = { + QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x02, 1), + QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x04, 2), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xd5), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0x7f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0), + QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x88), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x0f), +}; + +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2, 0x1d), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), +}; + static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08), QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34), @@ -1313,6 +1450,16 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08), }; +struct qmp_pcie_offsets { + u16 serdes; + u16 pcs; + u16 pcs_misc; + u16 tx; + u16 rx; + u16 tx2; + u16 rx2; +}; + struct qmp_phy_cfg_tbls { const struct qmp_phy_init_tbl *serdes; int serdes_num; @@ -1330,6 +1477,8 @@ struct qmp_phy_cfg_tbls { struct qmp_phy_cfg { int lanes; + const struct qmp_pcie_offsets *offsets; + /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_cfg_tbls tbls; /* @@ -1422,6 +1571,9 @@ static const char * const msm8996_phy_clk_l[] = { "aux", "cfg_ahb", "ref", }; +static const char * const sc8280xp_pciephy_clk_l[] = { + "aux", "cfg_ahb", "ref", "rchng", +}; static const char * const sdm845_pciephy_clk_l[] = { "aux", "cfg_ahb", "ref", "refgen", @@ -1441,6 +1593,16 @@ static const char * const sdm845_pciephy_reset_l[] = { "phy", }; +static const struct qmp_pcie_offsets qmp_pcie_offsets_v5 = { + .serdes = 0, + .pcs = 0x0200, + .pcs_misc = 0x0600, + .tx = 0x0e00, + .rx = 0x1000, + .tx2 = 0x1600, + .rx2 = 0x1800, +}; + static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .lanes = 1, @@ -1700,6 +1862,76 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .phy_status = PHYSTATUS, }; +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x1_pciephy_cfg = { + .lanes = 1, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x1_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x1_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x1_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl), + }, + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = { + .lanes = 2, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl), + }, + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, @@ -2220,11 +2452,49 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np return 0; } +static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_pcie_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + int ret; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->serdes = base + offs->serdes; + qmp->pcs = base + offs->pcs; + qmp->pcs_misc = base + offs->pcs_misc; + qmp->tx = base + offs->tx; + qmp->rx = base + offs->rx; + + if (cfg->lanes >= 2) { + qmp->tx2 = base + offs->tx2; + qmp->rx2 = base + offs->rx2; + } + + qmp->num_pipe_clks = 2; + qmp->pipe_clks[0].id = "pipe"; + qmp->pipe_clks[1].id = "pipediv2"; + + ret = devm_clk_bulk_get(dev, qmp->num_pipe_clks, qmp->pipe_clks); + if (ret) + return ret; + + return 0; +} + static int qmp_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *child; struct phy_provider *phy_provider; + struct device_node *np; struct qmp_pcie *qmp; int ret; @@ -2253,21 +2523,24 @@ static int qmp_pcie_probe(struct platform_device *pdev) if (ret) return ret; - child = of_get_next_available_child(dev->of_node, NULL); - if (!child) - return -EINVAL; - - ret = qmp_pcie_parse_dt_legacy(qmp, child); + /* Check for legacy binding with child node. */ + np = of_get_next_available_child(dev->of_node, NULL); + if (np) { + ret = qmp_pcie_parse_dt_legacy(qmp, np); + } else { + np = of_node_get(dev->of_node); + ret = qmp_pcie_parse_dt(qmp); + } if (ret) goto err_node_put; - ret = phy_pipe_clk_register(qmp, child); + ret = phy_pipe_clk_register(qmp, np); if (ret) goto err_node_put; qmp->mode = PHY_MODE_PCIE_RC; - qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops); + qmp->phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops); if (IS_ERR(qmp->phy)) { ret = PTR_ERR(qmp->phy); dev_err(dev, "failed to create PHY: %d\n", ret); @@ -2276,14 +2549,14 @@ static int qmp_pcie_probe(struct platform_device *pdev) phy_set_drvdata(qmp->phy, qmp); - of_node_put(child); + of_node_put(np); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(np); return ret; } @@ -2303,6 +2576,12 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sc8180x-qmp-pcie-phy", .data = &sc8180x_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x1-pcie-phy", + .data = &sc8280xp_qmp_gen3x1_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy", + .data = &sc8280xp_qmp_gen3x2_pciephy_cfg, }, { .compatible = "qcom,sdm845-qhp-pcie-phy", .data = &sdm845_qhp_pciephy_cfg, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h index 2e19fb3f051e0..a469ae2a10a13 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h @@ -8,6 +8,8 @@ #define QCOM_PHY_QMP_PCS_PCIE_V5_H_ /* Only for QMP V5 PHY - PCS_PCIE registers */ +#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2 0x0c +#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4 0x14 #define QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE 0x20 #define QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1 0x54 #define QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS 0x94 -- GitLab From 6c37a02b25180350ed7bd199c074a79fe6d16e51 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 5 Nov 2022 15:59:39 +0100 Subject: [PATCH 170/694] phy: qcom-qmp-pcie: add support for sc8280xp 4-lane PHYs The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in 4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as PCIe2A and PCIe2B). Add support for fetching the 4-lane configuration from the TCSR and programming the lane registers of the second port when in 4-lane mode. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221105145939.20318-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/Kconfig | 1 + drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 117 +++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig index 5c98850f5a360..eb9ddc685b385 100644 --- a/drivers/phy/qualcomm/Kconfig +++ b/drivers/phy/qualcomm/Kconfig @@ -54,6 +54,7 @@ config PHY_QCOM_QMP tristate "Qualcomm QMP PHY Driver" depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST) select GENERIC_PHY + select MFD_SYSCON help Enable this to support the QMP PHY transceiver that is used with controllers such as PCIe, UFS, and USB on Qualcomm chips. diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index f507a67a8361b..111716e25b173 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -886,6 +888,10 @@ static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] = QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14), }; +static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x1c), +}; + static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20), QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75), @@ -1491,6 +1497,9 @@ struct qmp_phy_cfg { const struct qmp_phy_cfg_tbls *tbls_rc; const struct qmp_phy_cfg_tbls *tbls_ep; + const struct qmp_phy_init_tbl *serdes_4ln_tbl; + int serdes_4ln_num; + /* clock ids to be requested */ const char * const *clk_list; int num_clks; @@ -1518,6 +1527,7 @@ struct qmp_pcie { struct device *dev; const struct qmp_phy_cfg *cfg; + bool tcsr_4ln_config; void __iomem *serdes; void __iomem *pcs; @@ -1527,6 +1537,8 @@ struct qmp_pcie { void __iomem *tx2; void __iomem *rx2; + void __iomem *port_b; + struct clk_bulk_data *clks; struct clk_bulk_data pipe_clks[2]; int num_pipe_clks; @@ -1932,6 +1944,44 @@ static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = { .phy_status = PHYSTATUS, }; +static const struct qmp_phy_cfg sc8280xp_qmp_gen3x4_pciephy_cfg = { + .lanes = 4, + + .offsets = &qmp_pcie_offsets_v5, + + .tbls = { + .serdes = sc8280xp_qmp_pcie_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl), + .tx = sc8280xp_qmp_gen3x2_pcie_tx_tbl, + .tx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl), + .rx = sc8280xp_qmp_gen3x2_pcie_rx_tbl, + .rx_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl), + .pcs = sc8280xp_qmp_gen3x2_pcie_pcs_tbl, + .pcs_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl), + .pcs_misc = sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl), + }, + + .tbls_rc = &(const struct qmp_phy_cfg_tbls) { + .serdes = sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl, + .serdes_num = ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl), + }, + + .serdes_4ln_tbl = sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl, + .serdes_4ln_num = ARRAY_SIZE(sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl), + + .clk_list = sc8280xp_pciephy_clk_l, + .num_clks = ARRAY_SIZE(sc8280xp_pciephy_clk_l), + .reset_list = sdm845_pciephy_reset_l, + .num_resets = ARRAY_SIZE(sdm845_pciephy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = sm8250_pcie_regs_layout, + + .pwrdn_ctrl = SW_PWRDN | REFCLK_DRV_DSBL, + .phy_status = PHYSTATUS, +}; + static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = { .lanes = 2, @@ -2054,6 +2104,24 @@ static void qmp_pcie_configure(void __iomem *base, qmp_pcie_configure_lane(base, tbl, num, 0xff); } +static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) +{ + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_pcie_offsets *offs = cfg->offsets; + void __iomem *tx3, *rx3, *tx4, *rx4; + + tx3 = qmp->port_b + offs->tx; + rx3 = qmp->port_b + offs->rx; + tx4 = qmp->port_b + offs->tx2; + rx4 = qmp->port_b + offs->rx2; + + qmp_pcie_configure_lane(tx3, tbls->tx, tbls->tx_num, 1); + qmp_pcie_configure_lane(rx3, tbls->rx, tbls->rx_num, 1); + + qmp_pcie_configure_lane(tx4, tbls->tx, tbls->tx_num, 2); + qmp_pcie_configure_lane(rx4, tbls->rx, tbls->rx_num, 2); +} + static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls) { const struct qmp_phy_cfg *cfg = qmp->cfg; @@ -2080,6 +2148,11 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num); qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num); + + if (cfg->lanes >= 4 && qmp->tcsr_4ln_config) { + qmp_pcie_configure(serdes, cfg->serdes_4ln_tbl, cfg->serdes_4ln_num); + qmp_pcie_init_port_b(qmp, tbls); + } } static int qmp_pcie_init(struct phy *phy) @@ -2452,6 +2525,37 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np return 0; } +static int qmp_pcie_get_4ln_config(struct qmp_pcie *qmp) +{ + struct regmap *tcsr; + unsigned int args[2]; + int ret; + + tcsr = syscon_regmap_lookup_by_phandle_args(qmp->dev->of_node, + "qcom,4ln-config-sel", + ARRAY_SIZE(args), args); + if (IS_ERR(tcsr)) { + ret = PTR_ERR(tcsr); + if (ret == -ENOENT) + return 0; + + dev_err(qmp->dev, "failed to lookup syscon: %d\n", ret); + return ret; + } + + ret = regmap_test_bits(tcsr, args[0], BIT(args[1])); + if (ret < 0) { + dev_err(qmp->dev, "failed to read tcsr: %d\n", ret); + return ret; + } + + qmp->tcsr_4ln_config = ret; + + dev_dbg(qmp->dev, "4ln_config_sel = %d\n", qmp->tcsr_4ln_config); + + return 0; +} + static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) { struct platform_device *pdev = to_platform_device(qmp->dev); @@ -2464,6 +2568,10 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) if (!offs) return -EINVAL; + ret = qmp_pcie_get_4ln_config(qmp); + if (ret) + return ret; + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); @@ -2479,6 +2587,12 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp) qmp->rx2 = base + offs->rx2; } + if (qmp->cfg->lanes >= 4 && qmp->tcsr_4ln_config) { + qmp->port_b = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->port_b)) + return PTR_ERR(qmp->port_b); + } + qmp->num_pipe_clks = 2; qmp->pipe_clks[0].id = "pipe"; qmp->pipe_clks[1].id = "pipediv2"; @@ -2582,6 +2696,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = { }, { .compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy", .data = &sc8280xp_qmp_gen3x2_pciephy_cfg, + }, { + .compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy", + .data = &sc8280xp_qmp_gen3x4_pciephy_cfg, }, { .compatible = "qcom,sdm845-qhp-pcie-phy", .data = &sdm845_qhp_pciephy_cfg, -- GitLab From 2584068a9ef4a7bff3b9302dd058a4c95ce68631 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Nov 2022 22:21:24 +0100 Subject: [PATCH 171/694] phy: qcom-qmp-pcie: split pcs_misc init cfg for ipq8074 pcs table Commit af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018 pcie gen3") reworked the pcs regs values and removed the 0x400 offset for each pcs_misc regs. This change caused the malfunction of ipq8074 downstream since it still has the legacy pcs table where pcs_misc are not placed on a different table and instead put together assuming the offset of 0x400 for the related pcs_misc regs. Split pcs_misc init cfg from the ipq8074 pcs init table to be handled correctly to prepare for actual support for gen3 pcie for ipq8074. Fixes: af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018 pcie gen3") Reported-by: Robert Marko Tested-by: Robert Marko Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20221103212125.17156-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 111716e25b173..43bd4576bee2c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -487,6 +487,13 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_FLL_CNTRL1, 0x01), QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_H, 0x0), QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_L, 0x1), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa), + QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d), +}; + +static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_ACTIONS, 0x0), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_H, 0x00), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_L, 0x01), @@ -499,11 +506,7 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG2, 0x50), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG4, 0x1a), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x6), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10), QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa), - QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d), }; static const struct qmp_phy_init_tbl sdm845_qmp_pcie_serdes_tbl[] = { @@ -1652,6 +1655,8 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = { .rx_num = ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl), .pcs = ipq8074_pcie_gen3_pcs_tbl, .pcs_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl), + .pcs_misc = ipq8074_pcie_gen3_pcs_misc_tbl, + .pcs_misc_num = ARRAY_SIZE(ipq8074_pcie_gen3_pcs_misc_tbl), }, .clk_list = ipq8074_pciephy_clk_l, .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), -- GitLab From 9ddcd920f8edfe65c3670fbd0b49db00e1e562fe Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 13:48:34 +0530 Subject: [PATCH 172/694] phy: qcom-qmp-pcie: Fix high latency with 4x2 PHY when ASPM is enabled The PCIe QMP 4x2 RC PHY generates high latency when ASPM is enabled. This seem to be fixed by clearing the QPHY_V5_20_PCS_PCIE_PRESET_P10_POST register of the pcs_misc register space. Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221102081835.41892-1-manivannan.sadhasivam@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 + drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 43bd4576bee2c..cb45f53a965bf 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1430,6 +1430,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1), QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_PRESET_P10_POST, 0x00), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h index c9fa90b454756..3d9713d348fe6 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h @@ -11,6 +11,7 @@ #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5 0x084 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS 0x090 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1 0x0a0 +#define QPHY_V5_20_PCS_PCIE_PRESET_P10_POST 0x0e0 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5 0x108 #define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN 0x15c #define QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3 0x184 -- GitLab From 883aebf6e1ea88145d64dcf940dbcb5181313338 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 2 Nov 2022 13:48:35 +0530 Subject: [PATCH 173/694] phy: qcom-qmp-pcie: Fix sm8450_qmp_gen4x2_pcie_pcs_tbl[] register names sm8450_qmp_gen4x2_pcie_pcs_tbl[] contains the init sequence for PCS registers of QMP PHY v5.20. So use the v5.20 specific register names. Only major change is the rename of PCS_EQ_CONFIG{2/3} registers to PCS_EQ_CONFIG{4/5}. Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support") Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221102081835.41892-2-manivannan.sadhasivam@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 ++++---- drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h | 14 ++++++++++++++ drivers/phy/qualcomm/phy-qcom-qmp.h | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index cb45f53a965bf..47cccc4b35b22 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1414,10 +1414,10 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rx_tbl[] = { }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG2, 0x16), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x22), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_G3S2_PRE_GAIN, 0x2e), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x99), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e), + QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x99), }; static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h new file mode 100644 index 0000000000000..9a5a20daf62cd --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, Linaro Ltd. + */ + +#ifndef QCOM_PHY_QMP_PCS_V5_20_H_ +#define QCOM_PHY_QMP_PCS_V5_20_H_ + +#define QPHY_V5_20_PCS_G3S2_PRE_GAIN 0x170 +#define QPHY_V5_20_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V5_20_PCS_EQ_CONFIG4 0x1e0 +#define QPHY_V5_20_PCS_EQ_CONFIG5 0x1e4 + +#endif diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 26274e3c0cf95..29a48f0436d2a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -38,6 +38,7 @@ #include "phy-qcom-qmp-pcs-pcie-v4_20.h" #include "phy-qcom-qmp-pcs-v5.h" +#include "phy-qcom-qmp-pcs-v5_20.h" #include "phy-qcom-qmp-pcs-pcie-v5.h" #include "phy-qcom-qmp-pcs-usb-v5.h" #include "phy-qcom-qmp-pcs-ufs-v5.h" -- GitLab From d7abac084536b6d7efcc0c1edc7d9035c34314d9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Nov 2022 09:54:59 +0900 Subject: [PATCH 174/694] dt-bindings: phy: renesas: Document Renesas Ethernet SERDES Document Renesas Etherent SERDES for R-Car S4-8 (r8a779f0). Signed-off-by: Yoshihiro Shimoda Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221108005500.3011449-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- .../phy/renesas,r8a779f0-ether-serdes.yaml | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml diff --git a/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml new file mode 100644 index 0000000000000..93ab72874228c --- /dev/null +++ b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/renesas,r8a779f0-ether-serdes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas Ethernet SERDES + +maintainers: + - Yoshihiro Shimoda + +properties: + compatible: + const: renesas,r8a779f0-ether-serdes + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + + power-domains: + maxItems: 1 + + '#phy-cells': + description: Port number of SERDES. + const: 1 + +required: + - compatible + - reg + - clocks + - resets + - power-domains + - '#phy-cells' + +additionalProperties: false + +examples: + - | + #include + #include + + phy@e6444000 { + compatible = "renesas,r8a779f0-ether-serdes"; + reg = <0xe6444000 0xc00>; + clocks = <&cpg CPG_MOD 1506>; + power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>; + resets = <&cpg 1506>; + #phy-cells = <1>; + }; -- GitLab From 742859441d44be4b408274206244011a09618a91 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 8 Nov 2022 09:55:00 +0900 Subject: [PATCH 175/694] phy: renesas: Add Renesas Ethernet SERDES driver for R-Car S4-8 Add Renesas Ethernet SERDES driver for R-Car S4-8 (r8a779f0). The datasheet describes initialization procedure without any information about registers' name/bits. So, this is all black magic to initialize the hardware. Especially, all channels should be initialized at once. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20221108005500.3011449-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/Kconfig | 8 + drivers/phy/renesas/Makefile | 1 + drivers/phy/renesas/r8a779f0-ether-serdes.c | 417 ++++++++++++++++++++ 3 files changed, 426 insertions(+) create mode 100644 drivers/phy/renesas/r8a779f0-ether-serdes.c diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index 111bdcae775c8..36505fc5f386e 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -2,6 +2,14 @@ # # Phy drivers for Renesas platforms # +# NOTE: Please sorted config names alphabetically. +config PHY_R8A779F0_ETHERNET_SERDES + tristate "Renesas R-Car S4-8 Ethernet SERDES driver" + depends on ARCH_RENESAS || COMPILE_TEST + select GENERIC_PHY + help + Support for Ethernet SERDES found on Renesas R-Car S4-8 SoCs. + config PHY_RCAR_GEN2 tristate "Renesas R-Car generation 2 USB PHY driver" depends on ARCH_RENESAS diff --git a/drivers/phy/renesas/Makefile b/drivers/phy/renesas/Makefile index b599ff8a4349c..8896d1919faa6 100644 --- a/drivers/phy/renesas/Makefile +++ b/drivers/phy/renesas/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_PHY_R8A779F0_ETHERNET_SERDES) += r8a779f0-ether-serdes.o obj-$(CONFIG_PHY_RCAR_GEN2) += phy-rcar-gen2.o obj-$(CONFIG_PHY_RCAR_GEN3_PCIE) += phy-rcar-gen3-pcie.o obj-$(CONFIG_PHY_RCAR_GEN3_USB2) += phy-rcar-gen3-usb2.o diff --git a/drivers/phy/renesas/r8a779f0-ether-serdes.c b/drivers/phy/renesas/r8a779f0-ether-serdes.c new file mode 100644 index 0000000000000..ec6594e6dc275 --- /dev/null +++ b/drivers/phy/renesas/r8a779f0-ether-serdes.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Renesas Ethernet SERDES device driver + * + * Copyright (C) 2022 Renesas Electronics Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define R8A779F0_ETH_SERDES_NUM 3 +#define R8A779F0_ETH_SERDES_OFFSET 0x0400 +#define R8A779F0_ETH_SERDES_BANK_SELECT 0x03fc +#define R8A779F0_ETH_SERDES_TIMEOUT_US 100000 +#define R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP 3 +#define R8A779F0_ETH_SERDES_NUM_RETRY_INIT 3 + +struct r8a779f0_eth_serdes_drv_data; +struct r8a779f0_eth_serdes_channel { + struct r8a779f0_eth_serdes_drv_data *dd; + struct phy *phy; + void __iomem *addr; + phy_interface_t phy_interface; + int speed; + int index; +}; + +struct r8a779f0_eth_serdes_drv_data { + void __iomem *addr; + struct platform_device *pdev; + struct reset_control *reset; + struct r8a779f0_eth_serdes_channel channel[R8A779F0_ETH_SERDES_NUM]; + bool initialized; +}; + +/* + * The datasheet describes initialization procedure without any information + * about registers' name/bits. So, this is all black magic to initialize + * the hardware. + */ +static void r8a779f0_eth_serdes_write32(void __iomem *addr, u32 offs, u32 bank, u32 data) +{ + iowrite32(bank, addr + R8A779F0_ETH_SERDES_BANK_SELECT); + iowrite32(data, addr + offs); +} + +static int +r8a779f0_eth_serdes_reg_wait(struct r8a779f0_eth_serdes_channel *channel, + u32 offs, u32 bank, u32 mask, u32 expected) +{ + int ret; + u32 val; + + iowrite32(bank, channel->addr + R8A779F0_ETH_SERDES_BANK_SELECT); + + ret = readl_poll_timeout_atomic(channel->addr + offs, val, + (val & mask) == expected, + 1, R8A779F0_ETH_SERDES_TIMEOUT_US); + if (ret) + dev_dbg(&channel->phy->dev, + "%s: index %d, offs %x, bank %x, mask %x, expected %x\n", + __func__, channel->index, offs, bank, mask, expected); + + return ret; +} + +static int +r8a779f0_eth_serdes_common_init_ram(struct r8a779f0_eth_serdes_drv_data *dd) +{ + struct r8a779f0_eth_serdes_channel *channel; + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + channel = &dd->channel[i]; + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x026c, 0x180, BIT(0), 0x01); + if (ret) + return ret; + } + + r8a779f0_eth_serdes_write32(dd->addr, 0x026c, 0x180, 0x03); + + return ret; +} + +static int +r8a779f0_eth_serdes_common_setting(struct r8a779f0_eth_serdes_channel *channel) +{ + struct r8a779f0_eth_serdes_drv_data *dd = channel->dd; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + r8a779f0_eth_serdes_write32(dd->addr, 0x0244, 0x180, 0x0097); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d0, 0x180, 0x0060); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d8, 0x180, 0x2200); + r8a779f0_eth_serdes_write32(dd->addr, 0x01d4, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(dd->addr, 0x01e0, 0x180, 0x003d); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +r8a779f0_eth_serdes_chan_setting(struct r8a779f0_eth_serdes_channel *channel) +{ + int ret; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2000); + r8a779f0_eth_serdes_write32(channel->addr, 0x01c0, 0x180, 0x0011); + r8a779f0_eth_serdes_write32(channel->addr, 0x0248, 0x180, 0x0540); + r8a779f0_eth_serdes_write32(channel->addr, 0x0258, 0x180, 0x0015); + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x01a0, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x00d0, 0x180, 0x0002); + r8a779f0_eth_serdes_write32(channel->addr, 0x0150, 0x180, 0x0003); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0100); + r8a779f0_eth_serdes_write32(channel->addr, 0x0174, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x0160, 0x180, 0x0007); + r8a779f0_eth_serdes_write32(channel->addr, 0x01ac, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x0310); + r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x380, 0x0101); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x00c8, 0x0180, BIT(0), 0); + if (ret) + return ret; + + r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0101); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0148, 0x0180, BIT(0), 0); + if (ret) + return ret; + + r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x1310); + r8a779f0_eth_serdes_write32(channel->addr, 0x00d8, 0x180, 0x1800); + r8a779f0_eth_serdes_write32(channel->addr, 0x00dc, 0x180, 0x0000); + r8a779f0_eth_serdes_write32(channel->addr, 0x001c, 0x300, 0x0001); + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2100); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0000, 0x0380, BIT(8), 0); + if (ret) + return ret; + + if (channel->speed == 1000) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x0140); + else if (channel->speed == 100) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x2100); + + /* For AN_ON */ + r8a779f0_eth_serdes_write32(channel->addr, 0x0004, 0x1f80, 0x0005); + r8a779f0_eth_serdes_write32(channel->addr, 0x0028, 0x1f80, 0x07a1); + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f80, 0x0208); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int +r8a779f0_eth_serdes_chan_speed(struct r8a779f0_eth_serdes_channel *channel) +{ + int ret; + + switch (channel->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + /* For AN_ON */ + if (channel->speed == 1000) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x1140); + else if (channel->speed == 100) + r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x3100); + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0008, 0x1f80, BIT(0), 1); + if (ret) + return ret; + r8a779f0_eth_serdes_write32(channel->addr, 0x0008, 0x1f80, 0x0000); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + + +static int r8a779f0_eth_serdes_monitor_linkup(struct r8a779f0_eth_serdes_channel *channel) +{ + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP; i++) { + ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0004, 0x300, + BIT(2), BIT(2)); + if (!ret) + break; + + /* restart */ + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100); + udelay(1); + r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0000); + } + + return ret; +} + +static int r8a779f0_eth_serdes_hw_init(struct r8a779f0_eth_serdes_channel *channel) +{ + struct r8a779f0_eth_serdes_drv_data *dd = channel->dd; + int i, ret; + + if (dd->initialized) + return 0; + + ret = r8a779f0_eth_serdes_common_init_ram(dd); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[i], 0x0000, + 0x300, BIT(15), 0); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d4, 0x380, 0x0443); + + ret = r8a779f0_eth_serdes_common_setting(channel); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0001); + + + r8a779f0_eth_serdes_write32(dd->addr, 0x0000, 0x380, 0x8000); + + ret = r8a779f0_eth_serdes_common_init_ram(dd); + if (ret) + return ret; + + ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[0], 0x0000, 0x380, BIT(15), 0); + if (ret) + return ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_chan_setting(&dd->channel[i]); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_chan_speed(&dd->channel[i]); + if (ret) + return ret; + } + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03c0, 0x380, 0x0000); + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) + r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0000); + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + ret = r8a779f0_eth_serdes_monitor_linkup(&dd->channel[i]); + if (ret) + return ret; + } + + return 0; +} + +static int r8a779f0_eth_serdes_init(struct phy *p) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + int i, ret; + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_INIT; i++) { + ret = r8a779f0_eth_serdes_hw_init(channel); + if (!ret) { + channel->dd->initialized = true; + break; + } + usleep_range(1000, 2000); + } + + return ret; +} + +static int r8a779f0_eth_serdes_set_mode(struct phy *p, enum phy_mode mode, + int submode) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + + if (mode != PHY_MODE_ETHERNET) + return -EOPNOTSUPP; + + switch (submode) { + case PHY_INTERFACE_MODE_GMII: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_USXGMII: + channel->phy_interface = submode; + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int r8a779f0_eth_serdes_set_speed(struct phy *p, int speed) +{ + struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p); + + channel->speed = speed; + + return 0; +} + +static const struct phy_ops r8a779f0_eth_serdes_ops = { + .init = r8a779f0_eth_serdes_init, + .set_mode = r8a779f0_eth_serdes_set_mode, + .set_speed = r8a779f0_eth_serdes_set_speed, +}; + +static struct phy *r8a779f0_eth_serdes_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct r8a779f0_eth_serdes_drv_data *dd = dev_get_drvdata(dev); + + if (args->args[0] >= R8A779F0_ETH_SERDES_NUM) + return ERR_PTR(-ENODEV); + + return dd->channel[args->args[0]].phy; +} + +static const struct of_device_id r8a779f0_eth_serdes_of_table[] = { + { .compatible = "renesas,r8a779f0-ether-serdes", }, + { } +}; +MODULE_DEVICE_TABLE(of, r8a779f0_eth_serdes_of_table); + +static int r8a779f0_eth_serdes_probe(struct platform_device *pdev) +{ + struct r8a779f0_eth_serdes_drv_data *dd; + struct phy_provider *provider; + struct resource *res; + int i; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "invalid resource\n"); + return -EINVAL; + } + + dd = devm_kzalloc(&pdev->dev, sizeof(*dd), GFP_KERNEL); + if (!dd) + return -ENOMEM; + + platform_set_drvdata(pdev, dd); + dd->pdev = pdev; + dd->addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dd->addr)) + return PTR_ERR(dd->addr); + + dd->reset = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(dd->reset)) + return PTR_ERR(dd->reset); + + reset_control_reset(dd->reset); + + for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) { + struct r8a779f0_eth_serdes_channel *channel = &dd->channel[i]; + + channel->phy = devm_phy_create(&pdev->dev, NULL, + &r8a779f0_eth_serdes_ops); + if (IS_ERR(channel->phy)) + return PTR_ERR(channel->phy); + channel->addr = dd->addr + R8A779F0_ETH_SERDES_OFFSET * i; + channel->dd = dd; + channel->index = i; + phy_set_drvdata(channel->phy, channel); + } + + provider = devm_of_phy_provider_register(&pdev->dev, + r8a779f0_eth_serdes_xlate); + if (IS_ERR(provider)) + return PTR_ERR(provider); + + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + return 0; +} + +static int r8a779f0_eth_serdes_remove(struct platform_device *pdev) +{ + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver r8a779f0_eth_serdes_driver_platform = { + .probe = r8a779f0_eth_serdes_probe, + .remove = r8a779f0_eth_serdes_remove, + .driver = { + .name = "r8a779f0_eth_serdes", + .of_match_table = r8a779f0_eth_serdes_of_table, + } +}; +module_platform_driver(r8a779f0_eth_serdes_driver_platform); +MODULE_AUTHOR("Yoshihiro Shimoda"); +MODULE_DESCRIPTION("Renesas Ethernet SERDES device driver"); +MODULE_LICENSE("GPL"); -- GitLab From 4c2e9ba05c7abac58bdb58e47eb69b156027fb7b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:50 +0300 Subject: [PATCH 176/694] dmaengine: at_hdmac: Do not print messages on console while holding the lock The descriptor was already removed from the transfer list, there's no reason to keep the channel lock while printing desc info, thus do the prints without holding the lock. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-17-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 858bd64f13135..f365ac4d87ffe 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -549,6 +549,8 @@ static void atc_handle_error(struct at_dma_chan *atchan) atc_dostart(atchan, desc); } + spin_unlock_irqrestore(&atchan->lock, flags); + /* * KERN_CRITICAL may seem harsh, but since this only happens * when someone submits a bad physical address in a @@ -564,8 +566,6 @@ static void atc_handle_error(struct at_dma_chan *atchan) list_for_each_entry(child, &bad_desc->tx_list, desc_node) atc_dump_lli(atchan, &child->lli); - spin_unlock_irqrestore(&atchan->lock, flags); - /* Pretend the descriptor completed successfully */ atc_chain_complete(atchan, bad_desc); } -- GitLab From 83c196152fc9a93c3d5a7cbf3229f42f87f232d8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:51 +0300 Subject: [PATCH 177/694] dmaengine: at_hdmac: Return dma_cookie_status()'s ret code when txstate is NULL txstate is an optional parameter used to get a struct with auxilary transfer status information. When not provided the call to device_tx_status() should return the status of the dma cookie. Return the status of dma cookie when the txstate optional parameter is not provided. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-18-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f365ac4d87ffe..10b6b0435d52e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1461,14 +1461,8 @@ atc_tx_status(struct dma_chan *chan, int bytes = 0; ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE) + if (ret == DMA_COMPLETE || !txstate) return ret; - /* - * There's no point calculating the residue if there's - * no txstate to store the value. - */ - if (!txstate) - return DMA_ERROR; spin_lock_irqsave(&atchan->lock, flags); -- GitLab From 0e75c28c52962b528947843e947b4bd0c74d40d2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:52 +0300 Subject: [PATCH 178/694] dmaengine: at_hdmac: Remove superfluous cast Conversions of void * are applied automatically when other pointer types are assigned to and from void *. Remove the superfluous cast. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-19-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 10b6b0435d52e..fbfb207104e98 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -603,7 +603,7 @@ static void atc_tasklet(struct tasklet_struct *t) static irqreturn_t at_dma_interrupt(int irq, void *dev_id) { - struct at_dma *atdma = (struct at_dma *)dev_id; + struct at_dma *atdma = dev_id; struct at_dma_chan *atchan; int i; u32 status, pending, imr; -- GitLab From f5d79afa3a858eb6e5cf2bcd894ed53b5bd8b5ff Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:53 +0300 Subject: [PATCH 179/694] dmaengine: at_hdmac: Pass residue by address to avoid unnecessary implicit casts struct dma_tx_state defines residue as u32. atc_get_bytes_left() returned an int which could be either an error or the value of the residue. This could cause problems if the controller supported a u32 buffer transfer size and the u32 value was past the max int can hold. Our controller does not support u32 buffer transfer size, but even so, improve the code and pass the residue by address to avoid unnecessary implicit casts and make atc_get_bytes_left() return 0 on success or -errno on errors. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-20-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 54 +++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fbfb207104e98..e2c46f32b2840 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -293,7 +293,7 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla) +static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) { u32 btsize = (ctrla & ATC_BTSIZE_MAX); u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); @@ -308,17 +308,20 @@ static inline int atc_calc_bytes_left(int current_len, u32 ctrla) } /** - * atc_get_bytes_left - get the number of bytes residue for a cookie + * atc_get_bytes_left - get the number of bytes residue for a cookie. + * The residue is passed by address and updated on success. * @chan: DMA channel * @cookie: transaction identifier to check status of + * @residue: residue to be updated. + * Return 0 on success, -errono otherwise. */ -static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) +static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie, + u32 *residue) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; - int ret; - u32 ctrla, dscr; + u32 len, ctrla, dscr; unsigned int i; /* @@ -333,7 +336,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) return desc->total_len; /* cookie matches to the currently running transfer */ - ret = desc_first->total_len; + len = desc_first->total_len; if (desc_first->lli.dscr) { /* hardware linked list transfer */ @@ -419,29 +422,31 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ - if (desc_first->lli.dscr == dscr) - return atc_calc_bytes_left(ret, ctrla); + if (desc_first->lli.dscr == dscr) { + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + } - ret -= desc_first->len; + len -= desc_first->len; list_for_each_entry(desc, &desc_first->tx_list, desc_node) { if (desc->lli.dscr == dscr) break; - ret -= desc->len; + len -= desc->len; } /* * For the current descriptor in the chain we can calculate * the remaining bytes using the channel's register. */ - ret = atc_calc_bytes_left(ret, ctrla); + *residue = atc_calc_bytes_left(len, ctrla); } else { /* single transfer */ ctrla = channel_readl(atchan, CTRLA); - ret = atc_calc_bytes_left(ret, ctrla); + *residue = atc_calc_bytes_left(len, ctrla); } - return ret; + return 0; } /** @@ -1457,31 +1462,32 @@ atc_tx_status(struct dma_chan *chan, { struct at_dma_chan *atchan = to_at_dma_chan(chan); unsigned long flags; - enum dma_status ret; - int bytes = 0; + enum dma_status dma_status; + u32 residue; + int ret; - ret = dma_cookie_status(chan, cookie, txstate); - if (ret == DMA_COMPLETE || !txstate) - return ret; + dma_status = dma_cookie_status(chan, cookie, txstate); + if (dma_status == DMA_COMPLETE || !txstate) + return dma_status; spin_lock_irqsave(&atchan->lock, flags); /* Get number of bytes left in the active transactions */ - bytes = atc_get_bytes_left(chan, cookie); + ret = atc_get_bytes_left(chan, cookie, &residue); spin_unlock_irqrestore(&atchan->lock, flags); - if (unlikely(bytes < 0)) { + if (unlikely(ret < 0)) { dev_vdbg(chan2dev(chan), "get residual bytes error\n"); return DMA_ERROR; } else { - dma_set_residue(txstate, bytes); + dma_set_residue(txstate, residue); } - dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n", - ret, cookie, bytes); + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %u\n", + dma_status, cookie, residue); - return ret; + return dma_status; } /** -- GitLab From 91617bf6bb41b32119f5ac007871ad1d115d4ed2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:54 +0300 Subject: [PATCH 180/694] dmaengine: at_hdmac: s/atc_get_bytes_left/atc_get_residue Use dmaengine terminology and rename the method to better indicate what it does: it gets the residue value which will be later on set with dma_set_residue(). Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-21-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e2c46f32b2840..6c328cd169832 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -308,15 +308,15 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) } /** - * atc_get_bytes_left - get the number of bytes residue for a cookie. + * atc_get_residue - get the number of bytes residue for a cookie. * The residue is passed by address and updated on success. * @chan: DMA channel * @cookie: transaction identifier to check status of * @residue: residue to be updated. * Return 0 on success, -errono otherwise. */ -static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie, - u32 *residue) +static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, + u32 *residue) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); @@ -1471,10 +1471,7 @@ atc_tx_status(struct dma_chan *chan, return dma_status; spin_lock_irqsave(&atchan->lock, flags); - - /* Get number of bytes left in the active transactions */ - ret = atc_get_bytes_left(chan, cookie, &residue); - + ret = atc_get_residue(chan, cookie, &residue); spin_unlock_irqrestore(&atchan->lock, flags); if (unlikely(ret < 0)) { -- GitLab From b50cf4bdfb9164c55d002624217d5a5ef4ab9573 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:55 +0300 Subject: [PATCH 181/694] dmaengine: at_hdmac: Introduce atc_get_llis_residue() Introduce a method to get the residue for a hardware linked list transfer. It makes the code easier to read. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-22-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 221 ++++++++++++++++++++--------------------- 1 file changed, 110 insertions(+), 111 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6c328cd169832..6bd9e35db8f9b 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -307,6 +307,109 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) return current_len - (btsize << src_width); } +/** + * atc_get_llis_residue - Get residue for a hardware linked list transfer + * + * Calculate the residue by removing the length of the child descriptors already + * transferred from the total length. To get the current child descriptor we can + * use the value of the channel's DSCR register and compare it against the value + * of the hardware linked list structure of each child descriptor. + * + * The CTRLA register provides us with the amount of data already read from the + * source for the current child descriptor. So we can compute a more accurate + * residue by also removing the number of bytes corresponding to this amount of + * data. + * + * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a + * race condition may occur: the first read register may refer to one child + * descriptor whereas the second read may refer to a later child descriptor in + * the list because of the DMA transfer progression inbetween the two reads. + * + * One solution could have been to pause the DMA transfer, read the DSCR and + * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some + * drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns are more likey + * to occur depending on the system latency. Taking the USART driver as an + * example, it uses a cyclic DMA transfer to read data from the Receive + * Holding Register (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer to compute the + * residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather avoid to do so + * for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a first time, the + * CTRLA is read in turn, next the DSCR is read a second time. If the two + * consecutive read values of the DSCR are the same then we assume both refers + * to the very same child descriptor as well as the CTRLA value read inbetween + * does. For cyclic tranfers, the assumption is that a full loop is "not so + * fast". If the two DSCR values are different, we read again the CTRLA then the + * DSCR till two consecutive read values from DSCR are equal or till the + * maximum trials is reach. This algorithm is very unlikely not to find a stable + * value for DSCR. + * @atchan: pointer to an atmel hdmac channel. + * @desc: pointer to the descriptor for which the residue is calculated. + * @residue: residue to be set to dma_tx_state. + * Returns 0 on success, -errno otherwise. + */ +static int atc_get_llis_residue(struct at_dma_chan *atchan, + struct at_desc *desc, u32 *residue) +{ + struct at_desc *child; + u32 len, ctrla, dscr; + unsigned int i; + + len = desc->total_len; + dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the DMA + * controller since the previous read, we assume that both the + * dscr and ctrla values refers to the very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the previouly + * read value of CTRLA may refer to an already processed + * descriptor hence could be outdated. We need to update ctrla + * to match the current descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(i == ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; + + /* For the first descriptor we can be more accurate. */ + if (desc->lli.dscr == dscr) { + *residue = atc_calc_bytes_left(len, ctrla); + return 0; + } + + len -= desc->len; + list_for_each_entry(child, &desc->tx_list, desc_node) { + if (child->lli.dscr == dscr) + break; + len -= child->len; + } + + /* + * For the current descriptor in the chain we can calculate the + * remaining bytes using the channel's register. + */ + *residue = atc_calc_bytes_left(len, ctrla); + return 0; +} + /** * atc_get_residue - get the number of bytes residue for a cookie. * The residue is passed by address and updated on success. @@ -321,8 +424,7 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; - u32 len, ctrla, dscr; - unsigned int i; + u32 len, ctrla; /* * If the cookie doesn't match to the currently running transfer then @@ -335,117 +437,14 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, else if (desc != desc_first) return desc->total_len; - /* cookie matches to the currently running transfer */ - len = desc_first->total_len; - - if (desc_first->lli.dscr) { + if (desc_first->lli.dscr) /* hardware linked list transfer */ + return atc_get_llis_residue(atchan, desc_first, residue); - /* - * Calculate the residue by removing the length of the child - * descriptors already transferred from the total length. - * To get the current child descriptor we can use the value of - * the channel's DSCR register and compare it against the value - * of the hardware linked list structure of each child - * descriptor. - * - * The CTRLA register provides us with the amount of data - * already read from the source for the current child - * descriptor. So we can compute a more accurate residue by also - * removing the number of bytes corresponding to this amount of - * data. - * - * However, the DSCR and CTRLA registers cannot be read both - * atomically. Hence a race condition may occur: the first read - * register may refer to one child descriptor whereas the second - * read may refer to a later child descriptor in the list - * because of the DMA transfer progression inbetween the two - * reads. - * - * One solution could have been to pause the DMA transfer, read - * the DSCR and CTRLA then resume the DMA transfer. Nonetheless, - * this approach presents some drawbacks: - * - If the DMA transfer is paused, RX overruns or TX underruns - * are more likey to occur depending on the system latency. - * Taking the USART driver as an example, it uses a cyclic DMA - * transfer to read data from the Receive Holding Register - * (RHR) to avoid RX overruns since the RHR is not protected - * by any FIFO on most Atmel SoCs. So pausing the DMA transfer - * to compute the residue would break the USART driver design. - * - The atc_pause() function masks interrupts but we'd rather - * avoid to do so for system latency purpose. - * - * Then we'd rather use another solution: the DSCR is read a - * first time, the CTRLA is read in turn, next the DSCR is read - * a second time. If the two consecutive read values of the DSCR - * are the same then we assume both refers to the very same - * child descriptor as well as the CTRLA value read inbetween - * does. For cyclic tranfers, the assumption is that a full loop - * is "not so fast". - * If the two DSCR values are different, we read again the CTRLA - * then the DSCR till two consecutive read values from DSCR are - * equal or till the maxium trials is reach. - * This algorithm is very unlikely not to find a stable value for - * DSCR. - */ - - dscr = channel_readl(atchan, DSCR); - rmb(); /* ensure DSCR is read before CTRLA */ - ctrla = channel_readl(atchan, CTRLA); - for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { - u32 new_dscr; - - rmb(); /* ensure DSCR is read after CTRLA */ - new_dscr = channel_readl(atchan, DSCR); - - /* - * If the DSCR register value has not changed inside the - * DMA controller since the previous read, we assume - * that both the dscr and ctrla values refers to the - * very same descriptor. - */ - if (likely(new_dscr == dscr)) - break; - - /* - * DSCR has changed inside the DMA controller, so the - * previouly read value of CTRLA may refer to an already - * processed descriptor hence could be outdated. - * We need to update ctrla to match the current - * descriptor. - */ - dscr = new_dscr; - rmb(); /* ensure DSCR is read before CTRLA */ - ctrla = channel_readl(atchan, CTRLA); - } - if (unlikely(i == ATC_MAX_DSCR_TRIALS)) - return -ETIMEDOUT; - - /* for the first descriptor we can be more accurate */ - if (desc_first->lli.dscr == dscr) { - *residue = atc_calc_bytes_left(len, ctrla); - return 0; - } - - len -= desc_first->len; - list_for_each_entry(desc, &desc_first->tx_list, desc_node) { - if (desc->lli.dscr == dscr) - break; - - len -= desc->len; - } - - /* - * For the current descriptor in the chain we can calculate - * the remaining bytes using the channel's register. - */ - *residue = atc_calc_bytes_left(len, ctrla); - } else { - /* single transfer */ - ctrla = channel_readl(atchan, CTRLA); - *residue = atc_calc_bytes_left(len, ctrla); - } - + /* single transfer */ + len = desc_first->total_len; + ctrla = channel_readl(atchan, CTRLA); + *residue = atc_calc_bytes_left(len, ctrla); return 0; } -- GitLab From 5f1d429b43b34b310a93651681d0cd8a39a86e3d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:56 +0300 Subject: [PATCH 182/694] dmaengine: at_hdmac: Use devm_kzalloc() and struct_size() Use the resource-managed kzalloc to simplify error logic. Memory allocated with this function is automatically freed on driver detach. Use struct_size() helper to calculate the size of the atdma structure with its trailing flexible array. While here, move the mem allocation higher in the probe method, as failing to allocate memory indicates a serious system issue, and everything else does not matter anyway. All these help the code look a bit cleaner. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-23-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6bd9e35db8f9b..f3fbb0aa8b241 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1786,6 +1787,12 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!plat_dat) return -ENODEV; + atdma = devm_kzalloc(&pdev->dev, + struct_size(atdma, chan, plat_dat->nr_channels), + GFP_KERNEL); + if (!atdma) + return -ENOMEM; + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!io) return -EINVAL; @@ -1794,21 +1801,13 @@ static int __init at_dma_probe(struct platform_device *pdev) if (irq < 0) return irq; - size = sizeof(struct at_dma); - size += plat_dat->nr_channels * sizeof(struct at_dma_chan); - atdma = kzalloc(size, GFP_KERNEL); - if (!atdma) - return -ENOMEM; - /* discover transaction capabilities */ atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; size = resource_size(io); - if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { - err = -EBUSY; - goto err_kfree; - } + if (!request_mem_region(io->start, size, pdev->dev.driver->name)) + return -EBUSY; atdma->regs = ioremap(io->start, size); if (!atdma->regs) { @@ -1963,8 +1962,6 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->regs = NULL; err_release_r: release_mem_region(io->start, size); -err_kfree: - kfree(atdma); return err; } @@ -2003,8 +2000,6 @@ static int at_dma_remove(struct platform_device *pdev) io = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(io->start, resource_size(io)); - kfree(atdma); - return 0; } -- GitLab From 8bfe4a61d40df2ddb707bf7d0b278907de2dd4f6 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:57 +0300 Subject: [PATCH 183/694] dmaengine: at_hdmac: Use devm_platform_ioremap_resource Use devm_platform_ioremap_resource() helper for cleanner code and easier resource management. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-24-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index f3fbb0aa8b241..10c250618a338 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1765,9 +1765,7 @@ static void at_dma_off(struct at_dma *atdma) static int __init at_dma_probe(struct platform_device *pdev) { - struct resource *io; struct at_dma *atdma; - size_t size; int irq; int err; int i; @@ -1793,9 +1791,9 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!atdma) return -ENOMEM; - io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!io) - return -EINVAL; + atdma->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(atdma->regs)) + return PTR_ERR(atdma->regs); irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -1805,21 +1803,10 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; - size = resource_size(io); - if (!request_mem_region(io->start, size, pdev->dev.driver->name)) - return -EBUSY; - - atdma->regs = ioremap(io->start, size); - if (!atdma->regs) { - err = -ENOMEM; - goto err_release_r; - } - atdma->clk = clk_get(&pdev->dev, "dma_clk"); - if (IS_ERR(atdma->clk)) { - err = PTR_ERR(atdma->clk); - goto err_clk; - } + if (IS_ERR(atdma->clk)) + return PTR_ERR(atdma->clk); + err = clk_prepare_enable(atdma->clk); if (err) goto err_clk_prepare; @@ -1957,11 +1944,6 @@ static int __init at_dma_probe(struct platform_device *pdev) clk_disable_unprepare(atdma->clk); err_clk_prepare: clk_put(atdma->clk); -err_clk: - iounmap(atdma->regs); - atdma->regs = NULL; -err_release_r: - release_mem_region(io->start, size); return err; } @@ -1969,7 +1951,6 @@ static int at_dma_remove(struct platform_device *pdev) { struct at_dma *atdma = platform_get_drvdata(pdev); struct dma_chan *chan, *_chan; - struct resource *io; at_dma_off(atdma); if (pdev->dev.of_node) @@ -1994,12 +1975,6 @@ static int at_dma_remove(struct platform_device *pdev) clk_disable_unprepare(atdma->clk); clk_put(atdma->clk); - iounmap(atdma->regs); - atdma->regs = NULL; - - io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(io->start, resource_size(io)); - return 0; } -- GitLab From 4c15a4c7f693f1f45ef534ddc428f2a9aa67bd13 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:58 +0300 Subject: [PATCH 184/694] dmaengine: at_hdmac: Use devm_clk_get() Clocks that are get with this method will be automatically put on driver detach. Use devm_clk_get() and simplify the error handling. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-25-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 10c250618a338..444aa7d75ff53 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1803,13 +1803,13 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; - atdma->clk = clk_get(&pdev->dev, "dma_clk"); + atdma->clk = devm_clk_get(&pdev->dev, "dma_clk"); if (IS_ERR(atdma->clk)) return PTR_ERR(atdma->clk); err = clk_prepare_enable(atdma->clk); if (err) - goto err_clk_prepare; + return err; /* force dma off, just in case */ at_dma_off(atdma); @@ -1942,8 +1942,6 @@ static int __init at_dma_probe(struct platform_device *pdev) free_irq(platform_get_irq(pdev, 0), atdma); err_irq: clk_disable_unprepare(atdma->clk); -err_clk_prepare: - clk_put(atdma->clk); return err; } @@ -1973,7 +1971,6 @@ static int at_dma_remove(struct platform_device *pdev) } clk_disable_unprepare(atdma->clk); - clk_put(atdma->clk); return 0; } -- GitLab From c23cd8c971f0b4697f344d981f13aae4123f866d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:59 +0300 Subject: [PATCH 185/694] dmaengine: at_hdmac: Use pm_ptr() Use pm_ptr() macro to fill at_dma_driver.driver.pm. In case CONFIG_PM is not enabled, the macro will return NULL. When NULL, at_dma_dev_pm_ops will end up being unused, so prepend it with the __maybe_unused attribute. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-26-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 444aa7d75ff53..4e3c519e60799 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -2084,7 +2084,7 @@ static int at_dma_resume_noirq(struct device *dev) return 0; } -static const struct dev_pm_ops at_dma_dev_pm_ops = { +static const struct dev_pm_ops __maybe_unused at_dma_dev_pm_ops = { .prepare = at_dma_prepare, .suspend_noirq = at_dma_suspend_noirq, .resume_noirq = at_dma_resume_noirq, @@ -2096,7 +2096,7 @@ static struct platform_driver at_dma_driver = { .id_table = atdma_devtypes, .driver = { .name = "at_hdmac", - .pm = &at_dma_dev_pm_ops, + .pm = pm_ptr(&at_dma_dev_pm_ops), .of_match_table = of_match_ptr(atmel_dma_dt_ids), }, }; -- GitLab From e3e672b8f95be38db26c971bc4c6b43d18a9836a Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:00 +0300 Subject: [PATCH 186/694] dmaengine: at_hdmac: Set include entries in alphabetic order It's a good practice to set the include entries in alphabetic order. It helps humans to read the code easier. Alphabetic order should also prove that each header is self-contained, i.e. can be included without prerequisites. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-27-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 4e3c519e60799..a3fa8bffdb749 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -12,16 +12,16 @@ #include #include #include -#include #include +#include #include #include -#include -#include #include #include #include #include +#include +#include #include "at_hdmac_regs.h" #include "dmaengine.h" -- GitLab From 5cecadc3e2a4fb72ab37d9420df0a9e1179b8a3e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:01 +0300 Subject: [PATCH 187/694] dmaengine: at_hdmac: Keep register definitions and structures private to at_hdmac.c Do not expose register definitions, structures and helpers via a .h file because there are used only by at_hdmac.c. Since there are no other users, remove the ambiguity and move all the .h contents to the .c file. One may notice some checkpatch warnings and errors with this move. The move was done "as it was", checkpatch complaints can be fixed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-28-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- MAINTAINERS | 1 - drivers/dma/at_hdmac.c | 469 ++++++++++++++++++++++++++++++++++- drivers/dma/at_hdmac_regs.h | 478 ------------------------------------ 3 files changed, 468 insertions(+), 480 deletions(-) delete mode 100644 drivers/dma/at_hdmac_regs.h diff --git a/MAINTAINERS b/MAINTAINERS index cd1264d24db85..c1b11b55ea89f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13415,7 +13415,6 @@ L: dmaengine@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/dma/atmel-dma.txt F: drivers/dma/at_hdmac.c -F: drivers/dma/at_hdmac_regs.h F: drivers/dma/at_xdmac.c F: include/dt-bindings/dma/at91.h diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a3fa8bffdb749..a07e3355f09a0 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -23,7 +23,6 @@ #include #include -#include "at_hdmac_regs.h" #include "dmaengine.h" /* @@ -35,6 +34,474 @@ * atc_ / atchan : ATmel DMA Channel entity related */ +#define AT_DMA_MAX_NR_CHANNELS 8 + + +#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ +#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ +#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) +#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) + +#define AT_DMA_EN 0x04 /* Controller Enable Register */ +#define AT_DMA_ENABLE (0x1 << 0) + +#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ +#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ + +#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ +#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ + +#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ +#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ + +#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ +#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ + +/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1C /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) (0x1 << (x)) +#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) +#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) + +#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ +#define AT_DMA_ENA(x) (0x1 << (x)) +#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) +#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) + +#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ +#define AT_DMA_DIS(x) (0x1 << (x)) +#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) + +#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ +#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) +#define AT_DMA_STAL(x) (0x1 << (24 + (x))) + + +#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ + +/* Hardware register offset for each channel */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ + + +/* Bitfield definitions */ + +/* Bitfields in DSCR */ +#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ + +/* Bitfields in CTRLA */ +#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ +#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ +#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) +#define ATC_SCSIZE_1 (0x0 << 16) +#define ATC_SCSIZE_4 (0x1 << 16) +#define ATC_SCSIZE_8 (0x2 << 16) +#define ATC_SCSIZE_16 (0x3 << 16) +#define ATC_SCSIZE_32 (0x4 << 16) +#define ATC_SCSIZE_64 (0x5 << 16) +#define ATC_SCSIZE_128 (0x6 << 16) +#define ATC_SCSIZE_256 (0x7 << 16) +#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ +#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) +#define ATC_DCSIZE_1 (0x0 << 20) +#define ATC_DCSIZE_4 (0x1 << 20) +#define ATC_DCSIZE_8 (0x2 << 20) +#define ATC_DCSIZE_16 (0x3 << 20) +#define ATC_DCSIZE_32 (0x4 << 20) +#define ATC_DCSIZE_64 (0x5 << 20) +#define ATC_DCSIZE_128 (0x6 << 20) +#define ATC_DCSIZE_256 (0x7 << 20) +#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ +#define ATC_SRC_WIDTH(x) ((x) << 24) +#define ATC_SRC_WIDTH_BYTE (0x0 << 24) +#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) +#define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) +#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ +#define ATC_DST_WIDTH(x) ((x) << 28) +#define ATC_DST_WIDTH_BYTE (0x0 << 28) +#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) +#define ATC_DST_WIDTH_WORD (0x2 << 28) +#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ + +/* Bitfields in CTRLB */ +#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ + /* Specify AHB interfaces */ +#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ + +#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ +#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) +#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ +#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) +#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ +#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ + +/* Bitfields in CFG */ +#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ + +#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ +#define ATC_SRC_H2SEL_SW (0x0 << 9) +#define ATC_SRC_H2SEL_HW (0x1 << 9) +#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ +#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ +#define ATC_DST_H2SEL_SW (0x0 << 13) +#define ATC_DST_H2SEL_HW (0x1 << 13) +#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ +#define ATC_SOD (0x1 << 16) /* Stop On Done */ +#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ +#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ +#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) +#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) +#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ +#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) +#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) +#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) + +/* Bitfields in SPIP */ +#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + +/* Bitfields in DPIP */ +#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) +#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) + + +/*-- descriptors -----------------------------------------------------*/ + +/* LLI == Linked List Item; aka DMA buffer descriptor */ +struct at_lli { + /* values that are not changed by hardware */ + u32 saddr; + u32 daddr; + /* value that may get written back: */ + u32 ctrla; + /* more values that are not changed by hardware */ + u32 ctrlb; + u32 dscr; /* chain to next lli */ +}; + +/** + * struct at_desc - software descriptor + * @at_lli: hardware lli structure + * @txd: support for the async_tx api + * @desc_node: node on the channed descriptors list + * @len: descriptor byte count + * @total_len: total transaction byte count + */ +struct at_desc { + /* FIRST values the hardware uses */ + struct at_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + struct list_head desc_node; + size_t len; + size_t total_len; + + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; + + /* Memset temporary buffer */ + bool memset_buffer; + dma_addr_t memset_paddr; + int *memset_vaddr; +}; + +static inline struct at_desc * +txd_to_at_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct at_desc, txd); +} + + +/*-- Channels --------------------------------------------------------*/ + +/** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_ERROR = 0, + ATC_IS_PAUSED = 1, + ATC_IS_CYCLIC = 24, +}; + +/** + * struct at_dma_chan - internal representation of an Atmel HDMAC channel + * @chan_common: common dmaengine channel object members + * @device: parent device + * @ch_regs: memory mapped register base + * @mask: channel index in a mask + * @per_if: peripheral interface + * @mem_if: memory interface + * @status: transmit status information from irq/prep* functions + * to tasklet (use atomic operations) + * @tasklet: bottom half to finish transaction work + * @save_cfg: configuration register that is saved on suspend/resume cycle + * @save_dscr: for cyclic operations, preserve next descriptor address in + * the cyclic list on suspend/resume cycle + * @dma_sconfig: configuration for slave transfers, passed via + * .device_config + * @lock: serializes enqueue/dequeue operations to descriptors lists + * @active_list: list of descriptors dmaengine is being running on + * @queue: list of descriptors ready to be submitted to engine + * @free_list: list of descriptors usable by the channel + */ +struct at_dma_chan { + struct dma_chan chan_common; + struct at_dma *device; + void __iomem *ch_regs; + u8 mask; + u8 per_if; + u8 mem_if; + unsigned long status; + struct tasklet_struct tasklet; + u32 save_cfg; + u32 save_dscr; + struct dma_slave_config dma_sconfig; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + struct list_head active_list; + struct list_head queue; + struct list_head free_list; +}; + +#define channel_readl(atchan, name) \ + __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) + +#define channel_writel(atchan, name, val) \ + __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) +{ + return container_of(dchan, struct at_dma_chan, chan_common); +} + +/* + * Fix sconfig's burst size according to at_hdmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. + * + * This can be done by finding most significant bit set. + */ +static inline void convert_burst(u32 *maxburst) +{ + if (*maxburst > 1) + *maxburst = fls(*maxburst) - 2; + else + *maxburst = 0; +} + +/* + * Fix sconfig's bus width according to at_hdmac. + * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. + */ +static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) +{ + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return 1; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return 2; + default: + /* For 1 byte width or fallback */ + return 0; + } +} + +/*-- Controller ------------------------------------------------------*/ + +/** + * struct at_dma - internal representation of an Atmel HDMA Controller + * @chan_common: common dmaengine dma_device object members + * @atdma_devtype: identifier of DMA controller compatibility + * @ch_regs: memory mapped register base + * @clk: dma controller clock + * @save_imr: interrupt mask register that is saved on suspend/resume cycle + * @all_chan_mask: all channels availlable in a mask + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @chan: channels table to store at_dma_chan structures + */ +struct at_dma { + struct dma_device dma_common; + void __iomem *regs; + struct clk *clk; + u32 save_imr; + + u8 all_chan_mask; + + struct dma_pool *dma_desc_pool; + struct dma_pool *memset_pool; + /* AT THE END channels table */ + struct at_dma_chan chan[]; +}; + +#define dma_readl(atdma, name) \ + __raw_readl((atdma)->regs + AT_DMA_##name) +#define dma_writel(atdma, name, val) \ + __raw_writel((val), (atdma)->regs + AT_DMA_##name) + +static inline struct at_dma *to_at_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct at_dma, dma_common); +} + + +/*-- Helper functions ------------------------------------------------*/ + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +#if defined(VERBOSE_DEBUG) +static void vdbg_dump_regs(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + dev_err(chan2dev(&atchan->chan_common), + " channel %d : imr = 0x%x, chsr = 0x%x\n", + atchan->chan_common.chan_id, + dma_readl(atdma, EBCIMR), + dma_readl(atdma, CHSR)); + + dev_err(chan2dev(&atchan->chan_common), + " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", + channel_readl(atchan, SADDR), + channel_readl(atchan, DADDR), + channel_readl(atchan, CTRLA), + channel_readl(atchan, CTRLB), + channel_readl(atchan, CFG), + channel_readl(atchan, DSCR)); +} +#else +static void vdbg_dump_regs(struct at_dma_chan *atchan) {} +#endif + +static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) +{ + dev_crit(chan2dev(&atchan->chan_common), + "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", + &lli->saddr, &lli->daddr, + lli->ctrla, lli->ctrlb, &lli->dscr); +} + + +static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) +{ + u32 ebci; + + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(chan_id) + | AT_DMA_ERR(chan_id); + if (on) + dma_writel(atdma, EBCIER, ebci); + else + dma_writel(atdma, EBCIDR, ebci); +} + +static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 1); +} + +static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) +{ + atc_setup_irq(atdma, chan_id, 0); +} + + +/** + * atc_chan_is_enabled - test if given channel is enabled + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) +{ + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + + return !!(dma_readl(atdma, CHSR) & atchan->mask); +} + +/** + * atc_chan_is_paused - test channel pause/resume status + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_paused(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_PAUSED, &atchan->status); +} + +/** + * atc_chan_is_cyclic - test if given channel has cyclic property set + * @atchan: channel we want to test status + */ +static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) +{ + return test_bit(ATC_IS_CYCLIC, &atchan->status); +} + +/** + * set_desc_eol - set end-of-link to descriptor so it will end transfer + * @desc: descriptor, signle or at the end of a chain, to end chain on + */ +static void set_desc_eol(struct at_desc *desc) +{ + u32 ctrlb = desc->lli.ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->lli.ctrlb = ctrlb; + desc->lli.dscr = 0; +} + #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) #define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ |ATC_DIF(AT_DMA_MEM_IF)) diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h deleted file mode 100644 index d4d382d746078..0000000000000 --- a/drivers/dma/at_hdmac_regs.h +++ /dev/null @@ -1,478 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Header file for the Atmel AHB DMA Controller driver - * - * Copyright (C) 2008 Atmel Corporation - */ -#ifndef AT_HDMAC_REGS_H -#define AT_HDMAC_REGS_H - -#define AT_DMA_MAX_NR_CHANNELS 8 - - -#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ -#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ -#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ -#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) -#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) - -#define AT_DMA_EN 0x04 /* Controller Enable Register */ -#define AT_DMA_ENABLE (0x1 << 0) - -#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ -#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ -#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ - -#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ -#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ -#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ - -#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ -#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ -#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ - -#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ -#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ - -/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ -#define AT_DMA_EBCIER 0x18 /* Enable register */ -#define AT_DMA_EBCIDR 0x1C /* Disable register */ -#define AT_DMA_EBCIMR 0x20 /* Mask Register */ -#define AT_DMA_EBCISR 0x24 /* Status Register */ -#define AT_DMA_CBTC_OFFSET 8 -#define AT_DMA_ERR_OFFSET 16 -#define AT_DMA_BTC(x) (0x1 << (x)) -#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) -#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) - -#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ -#define AT_DMA_ENA(x) (0x1 << (x)) -#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) -#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) - -#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ -#define AT_DMA_DIS(x) (0x1 << (x)) -#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) - -#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ -#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) -#define AT_DMA_STAL(x) (0x1 << (24 + (x))) - - -#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ -#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ - -/* Hardware register offset for each channel */ -#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ -#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ -#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ -#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ -#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ -#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ -#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ -#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ - - -/* Bitfield definitions */ - -/* Bitfields in DSCR */ -#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ - -/* Bitfields in CTRLA */ -#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ -#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ -#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ -#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) -#define ATC_SCSIZE_1 (0x0 << 16) -#define ATC_SCSIZE_4 (0x1 << 16) -#define ATC_SCSIZE_8 (0x2 << 16) -#define ATC_SCSIZE_16 (0x3 << 16) -#define ATC_SCSIZE_32 (0x4 << 16) -#define ATC_SCSIZE_64 (0x5 << 16) -#define ATC_SCSIZE_128 (0x6 << 16) -#define ATC_SCSIZE_256 (0x7 << 16) -#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ -#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) -#define ATC_DCSIZE_1 (0x0 << 20) -#define ATC_DCSIZE_4 (0x1 << 20) -#define ATC_DCSIZE_8 (0x2 << 20) -#define ATC_DCSIZE_16 (0x3 << 20) -#define ATC_DCSIZE_32 (0x4 << 20) -#define ATC_DCSIZE_64 (0x5 << 20) -#define ATC_DCSIZE_128 (0x6 << 20) -#define ATC_DCSIZE_256 (0x7 << 20) -#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ -#define ATC_SRC_WIDTH(x) ((x) << 24) -#define ATC_SRC_WIDTH_BYTE (0x0 << 24) -#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) -#define ATC_SRC_WIDTH_WORD (0x2 << 24) -#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) -#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ -#define ATC_DST_WIDTH(x) ((x) << 28) -#define ATC_DST_WIDTH_BYTE (0x0 << 28) -#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) -#define ATC_DST_WIDTH_WORD (0x2 << 28) -#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ - -/* Bitfields in CTRLB */ -#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ -#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ - /* Specify AHB interfaces */ -#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ -#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ - -#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ -#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ -#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ -#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ -#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ -#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ -#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ -#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ -#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ -#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ -#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ -#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ -#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ -#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) -#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ -#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ -#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ -#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) -#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ -#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ -#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ -#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ -#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ - -/* Bitfields in CFG */ -#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ - -#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ -#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ -#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ -#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ -#define ATC_SRC_H2SEL_SW (0x0 << 9) -#define ATC_SRC_H2SEL_HW (0x1 << 9) -#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ -#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ -#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ -#define ATC_DST_H2SEL_SW (0x0 << 13) -#define ATC_DST_H2SEL_HW (0x1 << 13) -#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ -#define ATC_SOD (0x1 << 16) /* Stop On Done */ -#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ -#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ -#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ -#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) -#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) -#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ -#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ -#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) -#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) -#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) - -/* Bitfields in SPIP */ -#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) - -/* Bitfields in DPIP */ -#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) - - -/*-- descriptors -----------------------------------------------------*/ - -/* LLI == Linked List Item; aka DMA buffer descriptor */ -struct at_lli { - /* values that are not changed by hardware */ - u32 saddr; - u32 daddr; - /* value that may get written back: */ - u32 ctrla; - /* more values that are not changed by hardware */ - u32 ctrlb; - u32 dscr; /* chain to next lli */ -}; - -/** - * struct at_desc - software descriptor - * @at_lli: hardware lli structure - * @txd: support for the async_tx api - * @desc_node: node on the channed descriptors list - * @len: descriptor byte count - * @total_len: total transaction byte count - */ -struct at_desc { - /* FIRST values the hardware uses */ - struct at_lli lli; - - /* THEN values for driver housekeeping */ - struct list_head tx_list; - struct dma_async_tx_descriptor txd; - struct list_head desc_node; - size_t len; - size_t total_len; - - /* Interleaved data */ - size_t boundary; - size_t dst_hole; - size_t src_hole; - - /* Memset temporary buffer */ - bool memset_buffer; - dma_addr_t memset_paddr; - int *memset_vaddr; -}; - -static inline struct at_desc * -txd_to_at_desc(struct dma_async_tx_descriptor *txd) -{ - return container_of(txd, struct at_desc, txd); -} - - -/*-- Channels --------------------------------------------------------*/ - -/** - * atc_status - information bits stored in channel status flag - * - * Manipulated with atomic operations. - */ -enum atc_status { - ATC_IS_ERROR = 0, - ATC_IS_PAUSED = 1, - ATC_IS_CYCLIC = 24, -}; - -/** - * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @chan_common: common dmaengine channel object members - * @device: parent device - * @ch_regs: memory mapped register base - * @mask: channel index in a mask - * @per_if: peripheral interface - * @mem_if: memory interface - * @status: transmit status information from irq/prep* functions - * to tasklet (use atomic operations) - * @tasklet: bottom half to finish transaction work - * @save_cfg: configuration register that is saved on suspend/resume cycle - * @save_dscr: for cyclic operations, preserve next descriptor address in - * the cyclic list on suspend/resume cycle - * @dma_sconfig: configuration for slave transfers, passed via - * .device_config - * @lock: serializes enqueue/dequeue operations to descriptors lists - * @active_list: list of descriptors dmaengine is being running on - * @queue: list of descriptors ready to be submitted to engine - * @free_list: list of descriptors usable by the channel - */ -struct at_dma_chan { - struct dma_chan chan_common; - struct at_dma *device; - void __iomem *ch_regs; - u8 mask; - u8 per_if; - u8 mem_if; - unsigned long status; - struct tasklet_struct tasklet; - u32 save_cfg; - u32 save_dscr; - struct dma_slave_config dma_sconfig; - - spinlock_t lock; - - /* these other elements are all protected by lock */ - struct list_head active_list; - struct list_head queue; - struct list_head free_list; -}; - -#define channel_readl(atchan, name) \ - __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET) - -#define channel_writel(atchan, name, val) \ - __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) - -static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) -{ - return container_of(dchan, struct at_dma_chan, chan_common); -} - -/* - * Fix sconfig's burst size according to at_hdmac. We need to convert them as: - * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. - * - * This can be done by finding most significant bit set. - */ -static inline void convert_burst(u32 *maxburst) -{ - if (*maxburst > 1) - *maxburst = fls(*maxburst) - 2; - else - *maxburst = 0; -} - -/* - * Fix sconfig's bus width according to at_hdmac. - * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2. - */ -static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) -{ - switch (addr_width) { - case DMA_SLAVE_BUSWIDTH_2_BYTES: - return 1; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - return 2; - default: - /* For 1 byte width or fallback */ - return 0; - } -} - -/*-- Controller ------------------------------------------------------*/ - -/** - * struct at_dma - internal representation of an Atmel HDMA Controller - * @chan_common: common dmaengine dma_device object members - * @atdma_devtype: identifier of DMA controller compatibility - * @ch_regs: memory mapped register base - * @clk: dma controller clock - * @save_imr: interrupt mask register that is saved on suspend/resume cycle - * @all_chan_mask: all channels availlable in a mask - * @dma_desc_pool: base of DMA descriptor region (DMA address) - * @chan: channels table to store at_dma_chan structures - */ -struct at_dma { - struct dma_device dma_common; - void __iomem *regs; - struct clk *clk; - u32 save_imr; - - u8 all_chan_mask; - - struct dma_pool *dma_desc_pool; - struct dma_pool *memset_pool; - /* AT THE END channels table */ - struct at_dma_chan chan[]; -}; - -#define dma_readl(atdma, name) \ - __raw_readl((atdma)->regs + AT_DMA_##name) -#define dma_writel(atdma, name, val) \ - __raw_writel((val), (atdma)->regs + AT_DMA_##name) - -static inline struct at_dma *to_at_dma(struct dma_device *ddev) -{ - return container_of(ddev, struct at_dma, dma_common); -} - - -/*-- Helper functions ------------------------------------------------*/ - -static struct device *chan2dev(struct dma_chan *chan) -{ - return &chan->dev->device; -} - -#if defined(VERBOSE_DEBUG) -static void vdbg_dump_regs(struct at_dma_chan *atchan) -{ - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); - - dev_err(chan2dev(&atchan->chan_common), - " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->chan_common.chan_id, - dma_readl(atdma, EBCIMR), - dma_readl(atdma, CHSR)); - - dev_err(chan2dev(&atchan->chan_common), - " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", - channel_readl(atchan, SADDR), - channel_readl(atchan, DADDR), - channel_readl(atchan, CTRLA), - channel_readl(atchan, CTRLB), - channel_readl(atchan, CFG), - channel_readl(atchan, DSCR)); -} -#else -static void vdbg_dump_regs(struct at_dma_chan *atchan) {} -#endif - -static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) -{ - dev_crit(chan2dev(&atchan->chan_common), - "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", - &lli->saddr, &lli->daddr, - lli->ctrla, lli->ctrlb, &lli->dscr); -} - - -static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on) -{ - u32 ebci; - - /* enable interrupts on buffer transfer completion & error */ - ebci = AT_DMA_BTC(chan_id) - | AT_DMA_ERR(chan_id); - if (on) - dma_writel(atdma, EBCIER, ebci); - else - dma_writel(atdma, EBCIDR, ebci); -} - -static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id) -{ - atc_setup_irq(atdma, chan_id, 1); -} - -static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) -{ - atc_setup_irq(atdma, chan_id, 0); -} - - -/** - * atc_chan_is_enabled - test if given channel is enabled - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) -{ - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); - - return !!(dma_readl(atdma, CHSR) & atchan->mask); -} - -/** - * atc_chan_is_paused - test channel pause/resume status - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_paused(struct at_dma_chan *atchan) -{ - return test_bit(ATC_IS_PAUSED, &atchan->status); -} - -/** - * atc_chan_is_cyclic - test if given channel has cyclic property set - * @atchan: channel we want to test status - */ -static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) -{ - return test_bit(ATC_IS_CYCLIC, &atchan->status); -} - -/** - * set_desc_eol - set end-of-link to descriptor so it will end transfer - * @desc: descriptor, signle or at the end of a chain, to end chain on - */ -static void set_desc_eol(struct at_desc *desc) -{ - u32 ctrlb = desc->lli.ctrlb; - - ctrlb &= ~ATC_IEN; - ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; - - desc->lli.ctrlb = ctrlb; - desc->lli.dscr = 0; -} - -#endif /* AT_HDMAC_REGS_H */ -- GitLab From d8840a7edcf0aa840e175af17d61476a7dbc65f7 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:02 +0300 Subject: [PATCH 188/694] dmaengine: at_hdmac: Use bitfield access macros Use the bitfield access macros in order to clean and to make the driver easier to read. One will see some "line length exceeds 100 columns" checkpatch warnings. I chose to not introduce new lines for regs descriptions in order to preserve the style of the comments throughout the definitions. Style can be fixed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-29-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 408 ++++++++++++++++++++--------------------- 1 file changed, 198 insertions(+), 210 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a07e3355f09a0..edec6a8c730c0 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -36,176 +37,153 @@ #define AT_DMA_MAX_NR_CHANNELS 8 +/* Global Configuration Register */ +#define AT_DMA_GCFG 0x00 +#define AT_DMA_IF_BIGEND(i) BIT((i)) /* AHB-Lite Interface i in Big-endian mode */ +#define AT_DMA_ARB_CFG BIT(4) /* Arbiter mode. */ -#define AT_DMA_GCFG 0x00 /* Global Configuration Register */ -#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */ -#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */ -#define AT_DMA_ARB_CFG_FIXED (0x0 << 4) -#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4) +/* Controller Enable Register */ +#define AT_DMA_EN 0x04 +#define AT_DMA_ENABLE BIT(0) -#define AT_DMA_EN 0x04 /* Controller Enable Register */ -#define AT_DMA_ENABLE (0x1 << 0) +/* Software Single Request Register */ +#define AT_DMA_SREQ 0x08 +#define AT_DMA_SSREQ(x) BIT((x) << 1) /* Request a source single transfer on channel x */ +#define AT_DMA_DSREQ(x) BIT(1 + ((x) << 1)) /* Request a destination single transfer on channel x */ -#define AT_DMA_SREQ 0x08 /* Software Single Request Register */ -#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */ -#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */ +/* Software Chunk Transfer Request Register */ +#define AT_DMA_CREQ 0x0c +#define AT_DMA_SCREQ(x) BIT((x) << 1) /* Request a source chunk transfer on channel x */ +#define AT_DMA_DCREQ(x) BIT(1 + ((x) << 1)) /* Request a destination chunk transfer on channel x */ -#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */ -#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */ -#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */ +/* Software Last Transfer Flag Register */ +#define AT_DMA_LAST 0x10 +#define AT_DMA_SLAST(x) BIT((x) << 1) /* This src rq is last tx of buffer on channel x */ +#define AT_DMA_DLAST(x) BIT(1 + ((x) << 1)) /* This dst rq is last tx of buffer on channel x */ -#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */ -#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */ -#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */ - -#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */ -#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */ +/* Request Synchronization Register */ +#define AT_DMA_SYNC 0x14 +#define AT_DMA_SYR(h) BIT((h)) /* Synchronize handshake line h */ /* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */ -#define AT_DMA_EBCIER 0x18 /* Enable register */ -#define AT_DMA_EBCIDR 0x1C /* Disable register */ -#define AT_DMA_EBCIMR 0x20 /* Mask Register */ -#define AT_DMA_EBCISR 0x24 /* Status Register */ -#define AT_DMA_CBTC_OFFSET 8 -#define AT_DMA_ERR_OFFSET 16 -#define AT_DMA_BTC(x) (0x1 << (x)) -#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x))) -#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x))) - -#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */ -#define AT_DMA_ENA(x) (0x1 << (x)) -#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x))) -#define AT_DMA_KEEP(x) (0x1 << (24 + (x))) - -#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */ -#define AT_DMA_DIS(x) (0x1 << (x)) -#define AT_DMA_RES(x) (0x1 << ( 8 + (x))) - -#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */ -#define AT_DMA_EMPT(x) (0x1 << (16 + (x))) -#define AT_DMA_STAL(x) (0x1 << (24 + (x))) - - -#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */ -#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ +#define AT_DMA_EBCIER 0x18 /* Enable register */ +#define AT_DMA_EBCIDR 0x1c /* Disable register */ +#define AT_DMA_EBCIMR 0x20 /* Mask Register */ +#define AT_DMA_EBCISR 0x24 /* Status Register */ +#define AT_DMA_CBTC_OFFSET 8 +#define AT_DMA_ERR_OFFSET 16 +#define AT_DMA_BTC(x) BIT((x)) +#define AT_DMA_CBTC(x) BIT(AT_DMA_CBTC_OFFSET + (x)) +#define AT_DMA_ERR(x) BIT(AT_DMA_ERR_OFFSET + (x)) + +/* Channel Handler Enable Register */ +#define AT_DMA_CHER 0x28 +#define AT_DMA_ENA(x) BIT((x)) +#define AT_DMA_SUSP(x) BIT(8 + (x)) +#define AT_DMA_KEEP(x) BIT(24 + (x)) + +/* Channel Handler Disable Register */ +#define AT_DMA_CHDR 0x2c +#define AT_DMA_DIS(x) BIT(x) +#define AT_DMA_RES(x) BIT(8 + (x)) + +/* Channel Handler Status Register */ +#define AT_DMA_CHSR 0x30 +#define AT_DMA_EMPT(x) BIT(16 + (x)) +#define AT_DMA_STAL(x) BIT(24 + (x)) + +/* Channel registers base address */ +#define AT_DMA_CH_REGS_BASE 0x3c +#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */ /* Hardware register offset for each channel */ -#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ -#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ -#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ -#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */ -#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ -#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ -#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ -#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */ +#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */ +#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */ +#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */ +#define ATC_CTRLA_OFFSET 0x0c /* Control A Register */ +#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */ +#define ATC_CFG_OFFSET 0x14 /* Configuration Register */ +#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */ +#define ATC_DPIP_OFFSET 0x1c /* Dst PIP Configuration Register */ /* Bitfield definitions */ /* Bitfields in DSCR */ -#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */ +#define ATC_DSCR_IF GENMASK(1, 0) /* Dsc feched via AHB-Lite Interface */ /* Bitfields in CTRLA */ -#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */ -#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */ -#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */ -#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16)) -#define ATC_SCSIZE_1 (0x0 << 16) -#define ATC_SCSIZE_4 (0x1 << 16) -#define ATC_SCSIZE_8 (0x2 << 16) -#define ATC_SCSIZE_16 (0x3 << 16) -#define ATC_SCSIZE_32 (0x4 << 16) -#define ATC_SCSIZE_64 (0x5 << 16) -#define ATC_SCSIZE_128 (0x6 << 16) -#define ATC_SCSIZE_256 (0x7 << 16) -#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */ -#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20)) -#define ATC_DCSIZE_1 (0x0 << 20) -#define ATC_DCSIZE_4 (0x1 << 20) -#define ATC_DCSIZE_8 (0x2 << 20) -#define ATC_DCSIZE_16 (0x3 << 20) -#define ATC_DCSIZE_32 (0x4 << 20) -#define ATC_DCSIZE_64 (0x5 << 20) -#define ATC_DCSIZE_128 (0x6 << 20) -#define ATC_DCSIZE_256 (0x7 << 20) -#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */ -#define ATC_SRC_WIDTH(x) ((x) << 24) -#define ATC_SRC_WIDTH_BYTE (0x0 << 24) -#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) -#define ATC_SRC_WIDTH_WORD (0x2 << 24) -#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) -#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ -#define ATC_DST_WIDTH(x) ((x) << 28) -#define ATC_DST_WIDTH_BYTE (0x0 << 28) -#define ATC_DST_WIDTH_HALFWORD (0x1 << 28) -#define ATC_DST_WIDTH_WORD (0x2 << 28) -#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */ +#define ATC_BTSIZE_MAX GENMASK(15, 0) /* Maximum Buffer Transfer Size */ +#define ATC_BTSIZE GENMASK(15, 0) /* Buffer Transfer Size */ +#define ATC_SCSIZE GENMASK(18, 16) /* Source Chunk Transfer Size */ +#define ATC_DCSIZE GENMASK(22, 20) /* Destination Chunk Transfer Size */ +#define ATC_SRC_WIDTH GENMASK(25, 24) /* Source Single Transfer Size */ +#define ATC_DST_WIDTH GENMASK(29, 28) /* Destination Single Transfer Size */ +#define ATC_DONE BIT(31) /* Tx Done (only written back in descriptor) */ /* Bitfields in CTRLB */ -#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ -#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ - /* Specify AHB interfaces */ -#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ -#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ - -#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ -#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ -#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ -#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */ -#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */ -#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */ -#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */ -#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */ -#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */ -#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */ -#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */ -#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */ -#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */ -#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24) -#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */ -#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */ -#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */ -#define ATC_DST_ADDR_MODE_MASK (0x3 << 28) -#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */ -#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */ -#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */ -#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */ -#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */ +#define ATC_SIF GENMASK(1, 0) /* Src tx done via AHB-Lite Interface i */ +#define ATC_DIF GENMASK(5, 4) /* Dst tx done via AHB-Lite Interface i */ +#define AT_DMA_MEM_IF 0x0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 0x1 /* interface 1 as peripheral interface */ +#define ATC_SRC_PIP BIT(8) /* Source Picture-in-Picture enabled */ +#define ATC_DST_PIP BIT(12) /* Destination Picture-in-Picture enabled */ +#define ATC_SRC_DSCR_DIS BIT(16) /* Src Descriptor fetch disable */ +#define ATC_DST_DSCR_DIS BIT(20) /* Dst Descriptor fetch disable */ +#define ATC_FC GENMASK(22, 21) /* Choose Flow Controller */ +#define ATC_FC_MEM2MEM 0x0 /* Mem-to-Mem (DMA) */ +#define ATC_FC_MEM2PER 0x1 /* Mem-to-Periph (DMA) */ +#define ATC_FC_PER2MEM 0x2 /* Periph-to-Mem (DMA) */ +#define ATC_FC_PER2PER 0x3 /* Periph-to-Periph (DMA) */ +#define ATC_FC_PER2MEM_PER 0x4 /* Periph-to-Mem (Peripheral) */ +#define ATC_FC_MEM2PER_PER 0x5 /* Mem-to-Periph (Peripheral) */ +#define ATC_FC_PER2PER_SRCPER 0x6 /* Periph-to-Periph (Src Peripheral) */ +#define ATC_FC_PER2PER_DSTPER 0x7 /* Periph-to-Periph (Dst Peripheral) */ +#define ATC_SRC_ADDR_MODE GENMASK(25, 24) +#define ATC_SRC_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_SRC_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_SRC_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_DST_ADDR_MODE GENMASK(29, 28) +#define ATC_DST_ADDR_MODE_INCR 0x0 /* Incrementing Mode */ +#define ATC_DST_ADDR_MODE_DECR 0x1 /* Decrementing Mode */ +#define ATC_DST_ADDR_MODE_FIXED 0x2 /* Fixed Mode */ +#define ATC_IEN BIT(30) /* BTC interrupt enable (active low) */ +#define ATC_AUTO BIT(31) /* Auto multiple buffer tx enable */ /* Bitfields in CFG */ #define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */ -#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */ -#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */ -#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */ -#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */ -#define ATC_SRC_H2SEL_SW (0x0 << 9) -#define ATC_SRC_H2SEL_HW (0x1 << 9) -#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */ -#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */ -#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */ -#define ATC_DST_H2SEL_SW (0x0 << 13) -#define ATC_DST_H2SEL_HW (0x1 << 13) -#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */ -#define ATC_SOD (0x1 << 16) /* Stop On Done */ -#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */ -#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */ -#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */ -#define ATC_LOCK_IF_L_CHUNK (0x0 << 22) -#define ATC_LOCK_IF_L_BUFFER (0x1 << 22) -#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */ -#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */ -#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28) -#define ATC_FIFOCFG_HALFFIFO (0x1 << 28) -#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28) +#define ATC_SRC_PER GENMASK(3, 0) /* Channel src rq associated with periph handshaking ifc h */ +#define ATC_DST_PER GENMASK(7, 4) /* Channel dst rq associated with periph handshaking ifc h */ +#define ATC_SRC_REP BIT(8) /* Source Replay Mod */ +#define ATC_SRC_H2SEL BIT(9) /* Source Handshaking Mod */ +#define ATC_SRC_PER_MSB GENMASK(11, 10) /* Channel src rq (most significant bits) */ +#define ATC_DST_REP BIT(12) /* Destination Replay Mod */ +#define ATC_DST_H2SEL BIT(13) /* Destination Handshaking Mod */ +#define ATC_DST_PER_MSB GENMASK(15, 14) /* Channel dst rq (most significant bits) */ +#define ATC_SOD BIT(16) /* Stop On Done */ +#define ATC_LOCK_IF BIT(20) /* Interface Lock */ +#define ATC_LOCK_B BIT(21) /* AHB Bus Lock */ +#define ATC_LOCK_IF_L BIT(22) /* Master Interface Arbiter Lock */ +#define ATC_AHB_PROT GENMASK(26, 24) /* AHB Protection */ +#define ATC_FIFOCFG GENMASK(29, 28) /* FIFO Request Configuration */ +#define ATC_FIFOCFG_LARGESTBURST 0x0 +#define ATC_FIFOCFG_HALFFIFO 0x1 +#define ATC_FIFOCFG_ENOUGHSPACE 0x2 /* Bitfields in SPIP */ -#define ATC_SPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) +#define ATC_SPIP_HOLE GENMASK(15, 0) +#define ATC_SPIP_BOUNDARY GENMASK(25, 16) /* Bitfields in DPIP */ -#define ATC_DPIP_HOLE(x) (0xFFFFU & (x)) -#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16) +#define ATC_DPIP_HOLE GENMASK(15, 0) +#define ATC_DPIP_BOUNDARY GENMASK(25, 16) + +#define ATC_SRC_PER_ID(id) (FIELD_PREP(ATC_SRC_PER_MSB, (id)) | \ + FIELD_PREP(ATC_SRC_PER, (id))) +#define ATC_DST_PER_ID(id) (FIELD_PREP(ATC_DST_PER_MSB, (id)) | \ + FIELD_PREP(ATC_DST_PER, (id))) + /*-- descriptors -----------------------------------------------------*/ @@ -502,9 +480,9 @@ static void set_desc_eol(struct at_desc *desc) desc->lli.dscr = 0; } -#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) -#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ - |ATC_DIF(AT_DMA_MEM_IF)) +#define ATC_DEFAULT_CFG FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO) +#define ATC_DEFAULT_CTRLB (FIELD_PREP(ATC_SIF, AT_DMA_MEM_IF) | \ + FIELD_PREP(ATC_DIF, AT_DMA_MEM_IF)) #define ATC_DMA_BUSWIDTHS\ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\ @@ -720,10 +698,12 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); channel_writel(atchan, DSCR, first->txd.phys); - channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) | - ATC_SPIP_BOUNDARY(first->boundary)); - channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | - ATC_DPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE, + first->src_hole) | + FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary)); + channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE, + first->dst_hole) | + FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary)); /* Don't allow CPU to reorder channel enable. */ wmb(); dma_writel(atdma, CHER, atchan->mask); @@ -763,8 +743,8 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, */ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) { - u32 btsize = (ctrla & ATC_BTSIZE_MAX); - u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); + u32 btsize = FIELD_GET(ATC_BTSIZE, ctrla); + u32 src_width = FIELD_GET(ATC_SRC_WIDTH, ctrla); /* * According to the datasheet, when reading the Control A Register @@ -1203,15 +1183,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan, return NULL; } - ctrla = ATC_SRC_WIDTH(dwidth) | - ATC_DST_WIDTH(dwidth); + ctrla = FIELD_PREP(ATC_SRC_WIDTH, dwidth) | + FIELD_PREP(ATC_DST_WIDTH, dwidth); - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN - | ATC_SRC_ADDR_MODE_INCR - | ATC_DST_ADDR_MODE_INCR - | ATC_SRC_PIP - | ATC_DST_PIP - | ATC_FC_MEM2MEM; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + ATC_SRC_PIP | ATC_DST_PIP | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); /* create the transfer */ desc = atc_desc_get(atchan); @@ -1272,10 +1251,10 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return NULL; } - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN - | ATC_SRC_ADDR_MODE_INCR - | ATC_DST_ADDR_MODE_INCR - | ATC_FC_MEM2MEM; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); /* * We can be a lot more clever here, but this should take care @@ -1283,8 +1262,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, */ src_width = dst_width = atc_get_xfer_width(src, dest, len); - ctrla = ATC_SRC_WIDTH(src_width) | - ATC_DST_WIDTH(dst_width); + ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) | + FIELD_PREP(ATC_DST_WIDTH, dst_width); for (offset = 0; offset < len; offset += xfer_count << src_width) { xfer_count = min_t(size_t, (len - offset) >> src_width, @@ -1330,11 +1309,11 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, struct at_desc *desc; size_t xfer_count; - u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2); + u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2); u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | - ATC_SRC_ADDR_MODE_FIXED | - ATC_DST_ADDR_MODE_INCR | - ATC_FC_MEM2MEM; + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); xfer_count = len >> 2; if (xfer_count > ATC_BTSIZE_MAX) { @@ -1549,18 +1528,20 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, return NULL; } - ctrla = ATC_SCSIZE(sconfig->src_maxburst) - | ATC_DCSIZE(sconfig->dst_maxburst); + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst); ctrlb = ATC_IEN; switch (direction) { case DMA_MEM_TO_DEV: reg_width = convert_buswidth(sconfig->dst_addr_width); - ctrla |= ATC_DST_WIDTH(reg_width); - ctrlb |= ATC_DST_ADDR_MODE_FIXED - | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER - | ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if); + ctrla |= FIELD_PREP(ATC_DST_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); reg = sconfig->dst_addr; for_each_sg(sgl, sg, sg_len, i) { struct at_desc *desc; @@ -1584,9 +1565,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lli.saddr = mem; desc->lli.daddr = reg; - desc->lli.ctrla = ctrla - | ATC_SRC_WIDTH(mem_width) - | len >> mem_width; + desc->lli.ctrla = ctrla | + FIELD_PREP(ATC_SRC_WIDTH, mem_width) | + len >> mem_width; desc->lli.ctrlb = ctrlb; desc->len = len; @@ -1596,11 +1577,13 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, break; case DMA_DEV_TO_MEM: reg_width = convert_buswidth(sconfig->src_addr_width); - ctrla |= ATC_SRC_WIDTH(reg_width); - ctrlb |= ATC_DST_ADDR_MODE_INCR - | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM - | ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if); + ctrla |= FIELD_PREP(ATC_SRC_WIDTH, reg_width); + ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); reg = sconfig->src_addr; for_each_sg(sgl, sg, sg_len, i) { @@ -1625,9 +1608,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lli.saddr = reg; desc->lli.daddr = mem; - desc->lli.ctrla = ctrla - | ATC_DST_WIDTH(mem_width) - | len >> reg_width; + desc->lli.ctrla = ctrla | + FIELD_PREP(ATC_DST_WIDTH, mem_width) | + len >> reg_width; desc->lli.ctrlb = ctrlb; desc->len = len; @@ -1693,22 +1676,24 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, u32 ctrla; /* prepare common CRTLA value */ - ctrla = ATC_SCSIZE(sconfig->src_maxburst) - | ATC_DCSIZE(sconfig->dst_maxburst) - | ATC_DST_WIDTH(reg_width) - | ATC_SRC_WIDTH(reg_width) - | period_len >> reg_width; + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | + FIELD_PREP(ATC_DST_WIDTH, reg_width) | + FIELD_PREP(ATC_SRC_WIDTH, reg_width) | + period_len >> reg_width; switch (direction) { case DMA_MEM_TO_DEV: desc->lli.saddr = buf_addr + (period_len * period_index); desc->lli.daddr = sconfig->dst_addr; desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED - | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER - | ATC_SIF(atchan->mem_if) - | ATC_DIF(atchan->per_if); + desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); desc->len = period_len; break; @@ -1716,11 +1701,13 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, desc->lli.saddr = sconfig->src_addr; desc->lli.daddr = buf_addr + (period_len * period_index); desc->lli.ctrla = ctrla; - desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR - | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM - | ATC_SIF(atchan->per_if) - | ATC_DIF(atchan->mem_if); + desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); desc->len = period_len; break; @@ -2115,14 +2102,13 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, return NULL; } - atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW; + atslave->cfg = ATC_DST_H2SEL | ATC_SRC_H2SEL; /* * We can fill both SRC_PER and DST_PER, one of these fields will be * ignored depending on DMA transfer direction. */ per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK; - atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id) - | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id); + atslave->cfg |= ATC_DST_PER_ID(per_id) | ATC_SRC_PER_ID(per_id); /* * We have to translate the value we get from the device tree since * the half FIFO configuration value had to be 0 to keep backward @@ -2130,14 +2116,16 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, */ switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) { case AT91_DMA_CFG_FIFOCFG_ALAP: - atslave->cfg |= ATC_FIFOCFG_LARGESTBURST; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_LARGESTBURST); break; case AT91_DMA_CFG_FIFOCFG_ASAP: - atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, + ATC_FIFOCFG_ENOUGHSPACE); break; case AT91_DMA_CFG_FIFOCFG_HALF: default: - atslave->cfg |= ATC_FIFOCFG_HALFFIFO; + atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO); } atslave->dma_dev = &dmac_pdev->dev; -- GitLab From 1c1114d850b6993184c117edad7c91f7f09cb9d5 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:03 +0300 Subject: [PATCH 189/694] dmaengine: at_hdmac: Rename "dma_common" to "dma_device" "dma_common" name was misleading and did not suggest that's actually a struct dma_device underneath. Rename it so that readers can follow the code easier. One may see some checks and a warning when running checkpatch. Those have nothing to do with the rename and will be addressed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-30-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 92 +++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index edec6a8c730c0..b2619600f68a6 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -338,7 +338,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) /** * struct at_dma - internal representation of an Atmel HDMA Controller - * @chan_common: common dmaengine dma_device object members + * @dma_device: dmaengine dma_device object members * @atdma_devtype: identifier of DMA controller compatibility * @ch_regs: memory mapped register base * @clk: dma controller clock @@ -348,7 +348,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @chan: channels table to store at_dma_chan structures */ struct at_dma { - struct dma_device dma_common; + struct dma_device dma_device; void __iomem *regs; struct clk *clk; u32 save_imr; @@ -368,7 +368,7 @@ struct at_dma { static inline struct at_dma *to_at_dma(struct dma_device *ddev) { - return container_of(ddev, struct at_dma, dma_common); + return container_of(ddev, struct at_dma, dma_device); } @@ -1069,11 +1069,11 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) if (!pending) break; - dev_vdbg(atdma->dma_common.dev, + dev_vdbg(atdma->dma_device.dev, "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n", status, imr, pending); - for (i = 0; i < atdma->dma_common.chancnt; i++) { + for (i = 0; i < atdma->dma_device.chancnt; i++) { atchan = &atdma->chan[i]; if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { if (pending & AT_DMA_ERR(i)) { @@ -2000,7 +2000,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) * We need controller-specific data to set up slave * transfers. */ - BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev); + BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_device.dev); /* if cfg configuration specified take it instead of default */ if (atslave->cfg) @@ -2011,7 +2011,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = atc_alloc_descriptor(chan, GFP_KERNEL); if (!desc) { - dev_err(atdma->dma_common.dev, + dev_err(atdma->dma_device.dev, "Only %d initial descriptors\n", i); break; } @@ -2255,7 +2255,7 @@ static int __init at_dma_probe(struct platform_device *pdev) return irq; /* discover transaction capabilities */ - atdma->dma_common.cap_mask = plat_dat->cap_mask; + atdma->dma_device.cap_mask = plat_dat->cap_mask; atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1; atdma->clk = devm_clk_get(&pdev->dev, "dma_clk"); @@ -2299,16 +2299,16 @@ static int __init at_dma_probe(struct platform_device *pdev) cpu_relax(); /* initialize channels related values */ - INIT_LIST_HEAD(&atdma->dma_common.channels); + INIT_LIST_HEAD(&atdma->dma_device.channels); for (i = 0; i < plat_dat->nr_channels; i++) { struct at_dma_chan *atchan = &atdma->chan[i]; atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->chan_common.device = &atdma->dma_common; + atchan->chan_common.device = &atdma->dma_device; dma_cookie_init(&atchan->chan_common); list_add_tail(&atchan->chan_common.device_node, - &atdma->dma_common.channels); + &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); spin_lock_init(&atchan->lock); @@ -2323,49 +2323,49 @@ static int __init at_dma_probe(struct platform_device *pdev) } /* set base routines */ - atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; - atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; - atdma->dma_common.device_tx_status = atc_tx_status; - atdma->dma_common.device_issue_pending = atc_issue_pending; - atdma->dma_common.dev = &pdev->dev; + atdma->dma_device.device_alloc_chan_resources = atc_alloc_chan_resources; + atdma->dma_device.device_free_chan_resources = atc_free_chan_resources; + atdma->dma_device.device_tx_status = atc_tx_status; + atdma->dma_device.device_issue_pending = atc_issue_pending; + atdma->dma_device.dev = &pdev->dev; /* set prep routines based on capability */ - if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask)) - atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved; + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_interleaved_dma = atc_prep_dma_interleaved; - if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) - atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask)) + atdma->dma_device.device_prep_dma_memcpy = atc_prep_dma_memcpy; - if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) { - atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset; - atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; - atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES; + if (dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_dma_memset = atc_prep_dma_memset; + atdma->dma_device.device_prep_dma_memset_sg = atc_prep_dma_memset_sg; + atdma->dma_device.fill_align = DMAENGINE_ALIGN_4_BYTES; } - if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { - atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + if (dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask)) { + atdma->dma_device.device_prep_slave_sg = atc_prep_slave_sg; /* controller can do slave DMA: can trigger cyclic transfers */ - dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask); - atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic; - atdma->dma_common.device_config = atc_config; - atdma->dma_common.device_pause = atc_pause; - atdma->dma_common.device_resume = atc_resume; - atdma->dma_common.device_terminate_all = atc_terminate_all; - atdma->dma_common.src_addr_widths = ATC_DMA_BUSWIDTHS; - atdma->dma_common.dst_addr_widths = ATC_DMA_BUSWIDTHS; - atdma->dma_common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + dma_cap_set(DMA_CYCLIC, atdma->dma_device.cap_mask); + atdma->dma_device.device_prep_dma_cyclic = atc_prep_dma_cyclic; + atdma->dma_device.device_config = atc_config; + atdma->dma_device.device_pause = atc_pause; + atdma->dma_device.device_resume = atc_resume; + atdma->dma_device.device_terminate_all = atc_terminate_all; + atdma->dma_device.src_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.dst_addr_widths = ATC_DMA_BUSWIDTHS; + atdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + atdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; } dma_writel(atdma, EN, AT_DMA_ENABLE); dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", - dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", - dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "", - dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", + dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask) ? "set " : "", + dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask) ? "slave " : "", plat_dat->nr_channels); - err = dma_async_device_register(&atdma->dma_common); + err = dma_async_device_register(&atdma->dma_device); if (err) { dev_err(&pdev->dev, "Unable to register: %d.\n", err); goto err_dma_async_device_register; @@ -2388,7 +2388,7 @@ static int __init at_dma_probe(struct platform_device *pdev) return 0; err_of_dma_controller_register: - dma_async_device_unregister(&atdma->dma_common); + dma_async_device_unregister(&atdma->dma_device); err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: @@ -2408,13 +2408,13 @@ static int at_dma_remove(struct platform_device *pdev) at_dma_off(atdma); if (pdev->dev.of_node) of_dma_controller_free(pdev->dev.of_node); - dma_async_device_unregister(&atdma->dma_common); + dma_async_device_unregister(&atdma->dma_device); dma_pool_destroy(atdma->memset_pool); dma_pool_destroy(atdma->dma_desc_pool); free_irq(platform_get_irq(pdev, 0), atdma); - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); @@ -2443,7 +2443,7 @@ static int at_dma_prepare(struct device *dev) struct at_dma *atdma = dev_get_drvdata(dev); struct dma_chan *chan, *_chan; - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); /* wait for transaction completion (except in cyclic case) */ @@ -2478,7 +2478,7 @@ static int at_dma_suspend_noirq(struct device *dev) struct dma_chan *chan, *_chan; /* preserve data */ - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); @@ -2528,7 +2528,7 @@ static int at_dma_resume_noirq(struct device *dev) /* restore saved data */ dma_writel(atdma, EBCIER, atdma->save_imr); - list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels, + list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { struct at_dma_chan *atchan = to_at_dma_chan(chan); -- GitLab From 304184f79c7eb50b32915b29f7cacd58455048d2 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:04 +0300 Subject: [PATCH 190/694] dmaengine: at_hdmac: Rename "chan_common" to "dma_chan" "chan_common" was misleading and did not suggest that's actually a struct dma_chan underneath. Rename it so that readers can follow the code easier. One may see some checks when running checkpatch. Those have nothing to do with the rename and will be addressed in a further patch. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-31-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 62 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b2619600f68a6..e39b9b47234a3 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -252,7 +252,7 @@ enum atc_status { /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @chan_common: common dmaengine channel object members + * @dma_chan: common dmaengine channel object members * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask @@ -272,7 +272,7 @@ enum atc_status { * @free_list: list of descriptors usable by the channel */ struct at_dma_chan { - struct dma_chan chan_common; + struct dma_chan dma_chan; struct at_dma *device; void __iomem *ch_regs; u8 mask; @@ -300,7 +300,7 @@ struct at_dma_chan { static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) { - return container_of(dchan, struct at_dma_chan, chan_common); + return container_of(dchan, struct at_dma_chan, dma_chan); } /* @@ -382,15 +382,15 @@ static struct device *chan2dev(struct dma_chan *chan) #if defined(VERBOSE_DEBUG) static void vdbg_dump_regs(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->chan_common.chan_id, + atchan->dma_chan.chan_id, dma_readl(atdma, EBCIMR), dma_readl(atdma, CHSR)); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -405,7 +405,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", &lli->saddr, &lli->daddr, lli->ctrla, lli->ctrlb, &lli->dscr); @@ -442,7 +442,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) */ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); return !!(dma_readl(atdma, CHSR) & atchan->mask); } @@ -603,16 +603,16 @@ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) ret = desc; break; } - dev_dbg(chan2dev(&atchan->chan_common), + dev_dbg(chan2dev(&atchan->dma_chan), "desc %p not ACKed\n", desc); } spin_unlock_irqrestore(&atchan->lock, flags); - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "scanned %u descriptors on freelist\n", i); /* no more descriptor available in initial pool: create one more */ if (!ret) - ret = atc_alloc_descriptor(&atchan->chan_common, GFP_NOWAIT); + ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT); return ret; } @@ -630,11 +630,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) spin_lock_irqsave(&atchan->lock, flags); list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "moving child desc %p to freelist\n", child); list_splice_init(&desc->tx_list, &atchan->free_list); - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); @@ -673,13 +673,13 @@ static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, */ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); /* ASSERT: channel is idle */ if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), "BUG: Attempted to start non-idle channel\n"); - dev_err(chan2dev(&atchan->chan_common), + dev_err(chan2dev(&atchan->dma_chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -905,10 +905,10 @@ static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { struct dma_async_tx_descriptor *txd = &desc->txd; - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); unsigned long flags; - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "descriptor %u complete\n", txd->cookie); spin_lock_irqsave(&atchan->lock, flags); @@ -951,7 +951,7 @@ static void atc_advance_work(struct at_dma_chan *atchan) struct at_desc *desc; unsigned long flags; - dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n"); spin_lock_irqsave(&atchan->lock, flags); if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) @@ -1010,9 +1010,9 @@ static void atc_handle_error(struct at_dma_chan *atchan) * controller flagged an error instead of scribbling over * random memory locations. */ - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), "Bad descriptor submitted for DMA!\n"); - dev_crit(chan2dev(&atchan->chan_common), + dev_crit(chan2dev(&atchan->dma_chan), " cookie: %d\n", bad_desc->txd.cookie); atc_dump_lli(atchan, &bad_desc->lli); list_for_each_entry(child, &bad_desc->tx_list, desc_node) @@ -1031,7 +1031,7 @@ static void atc_handle_cyclic(struct at_dma_chan *atchan) struct at_desc *first = atc_first_active(atchan); struct dma_async_tx_descriptor *txd = &first->txd; - dev_vdbg(chan2dev(&atchan->chan_common), + dev_vdbg(chan2dev(&atchan->dma_chan), "new cyclic period llp 0x%08x\n", channel_readl(atchan, DSCR)); @@ -1825,7 +1825,7 @@ static int atc_pause(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1844,7 +1844,7 @@ static int atc_resume(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1866,7 +1866,7 @@ static int atc_terminate_all(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; + int chan_id = atchan->dma_chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -2305,9 +2305,9 @@ static int __init at_dma_probe(struct platform_device *pdev) atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->chan_common.device = &atdma->dma_device; - dma_cookie_init(&atchan->chan_common); - list_add_tail(&atchan->chan_common.device_node, + atchan->dma_chan.device = &atdma->dma_device; + dma_cookie_init(&atchan->dma_chan); + list_add_tail(&atchan->dma_chan.device_node, &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); @@ -2455,7 +2455,7 @@ static int at_dma_prepare(struct device *dev) static void atc_suspend_cyclic(struct at_dma_chan *atchan) { - struct dma_chan *chan = &atchan->chan_common; + struct dma_chan *chan = &atchan->dma_chan; /* Channel should be paused by user * do it anyway even if it is not done already */ @@ -2496,7 +2496,7 @@ static int at_dma_suspend_noirq(struct device *dev) static void atc_resume_cyclic(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->chan_common.device); + struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); /* restore channel status for cyclic descriptors list: * next descriptor in the cyclic list at the time of suspend */ -- GitLab From 993b397752f3babb698932f20c1c363c4eca4efc Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:05 +0300 Subject: [PATCH 191/694] dmaengine: at_hdmac: Remove unused member of at_dma_chan The pointer to at_dma engine was never used, remove it. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-32-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e39b9b47234a3..143d75c18d1e6 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -253,7 +253,6 @@ enum atc_status { /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel * @dma_chan: common dmaengine channel object members - * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask * @per_if: peripheral interface @@ -273,7 +272,6 @@ enum atc_status { */ struct at_dma_chan { struct dma_chan dma_chan; - struct at_dma *device; void __iomem *ch_regs; u8 mask; u8 per_if; -- GitLab From ac803b56860f6506c55a3c9330007837e3f4edda Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:03:06 +0300 Subject: [PATCH 192/694] dmaengine: at_hdmac: Convert driver to use virt-dma Convert the driver to use the core virt-dma. The driver will be easier to maintain as it uses the list handling and the tasklet from virt-dma. With the conversion replace the election of a new transfer in the tasklet with the election of the new transfer in the interrupt handler. With this we have a shorter idle window as we remove the scheduling latency of the tasklet. I chose to do this while doing the conversion to virt-dma, because if I made a prerequisite patch with the new transfer election in the irq handler, I would have to duplicate some virt-dma code in the at_hdmac driver that would end up being removed at the virt-dma conversion anyway. So do this in a single step. Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-33-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + drivers/dma/at_hdmac.c | 1108 +++++++++++++++------------------------- 2 files changed, 403 insertions(+), 706 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index ea81d825575fb..b9d54f20812f7 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -97,6 +97,7 @@ config AT_HDMAC tristate "Atmel AHB DMA support" depends on ARCH_AT91 select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help Support the Atmel AHB DMA controller. diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 143d75c18d1e6..8858470246e15 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -3,6 +3,7 @@ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems) * * Copyright (C) 2008 Atmel Corporation + * Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries * * This supports the Atmel AHB DMA Controller found in several Atmel SoCs. * The only Atmel DMA Controller that is not covered by this driver is the one @@ -25,6 +26,7 @@ #include #include "dmaengine.h" +#include "virt-dma.h" /* * Glossary @@ -200,25 +202,31 @@ struct at_lli { u32 dscr; /* chain to next lli */ }; +/** + * struct atdma_sg - atdma scatter gather entry + * @len: length of the current Linked List Item. + * @lli: linked list item that is passed to the DMA controller + * @lli_phys: physical address of the LLI. + */ +struct atdma_sg { + unsigned int len; + struct at_lli *lli; + dma_addr_t lli_phys; +}; + /** * struct at_desc - software descriptor - * @at_lli: hardware lli structure - * @txd: support for the async_tx api - * @desc_node: node on the channed descriptors list - * @len: descriptor byte count + * @vd: pointer to the virtual dma descriptor. + * @atchan: pointer to the atmel dma channel. * @total_len: total transaction byte count + * @sg_len: number of sg entries. + * @sg: array of sgs. */ struct at_desc { - /* FIRST values the hardware uses */ - struct at_lli lli; - - /* THEN values for driver housekeeping */ - struct list_head tx_list; - struct dma_async_tx_descriptor txd; - struct list_head desc_node; - size_t len; + struct virt_dma_desc vd; + struct at_dma_chan *atchan; size_t total_len; - + unsigned int sglen; /* Interleaved data */ size_t boundary; size_t dst_hole; @@ -228,15 +236,9 @@ struct at_desc { bool memset_buffer; dma_addr_t memset_paddr; int *memset_vaddr; + struct atdma_sg sg[]; }; -static inline struct at_desc * -txd_to_at_desc(struct dma_async_tx_descriptor *txd) -{ - return container_of(txd, struct at_desc, txd); -} - - /*-- Channels --------------------------------------------------------*/ /** @@ -245,49 +247,40 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) * Manipulated with atomic operations. */ enum atc_status { - ATC_IS_ERROR = 0, ATC_IS_PAUSED = 1, ATC_IS_CYCLIC = 24, }; /** * struct at_dma_chan - internal representation of an Atmel HDMAC channel - * @dma_chan: common dmaengine channel object members + * @vc: virtual dma channel entry. + * @atdma: pointer to the driver data. * @ch_regs: memory mapped register base * @mask: channel index in a mask * @per_if: peripheral interface * @mem_if: memory interface * @status: transmit status information from irq/prep* functions * to tasklet (use atomic operations) - * @tasklet: bottom half to finish transaction work * @save_cfg: configuration register that is saved on suspend/resume cycle * @save_dscr: for cyclic operations, preserve next descriptor address in * the cyclic list on suspend/resume cycle * @dma_sconfig: configuration for slave transfers, passed via * .device_config - * @lock: serializes enqueue/dequeue operations to descriptors lists - * @active_list: list of descriptors dmaengine is being running on - * @queue: list of descriptors ready to be submitted to engine - * @free_list: list of descriptors usable by the channel + * @desc: pointer to the atmel dma descriptor. */ struct at_dma_chan { - struct dma_chan dma_chan; + struct virt_dma_chan vc; + struct at_dma *atdma; void __iomem *ch_regs; u8 mask; u8 per_if; u8 mem_if; unsigned long status; - struct tasklet_struct tasklet; u32 save_cfg; u32 save_dscr; - struct dma_slave_config dma_sconfig; - - spinlock_t lock; - - /* these other elements are all protected by lock */ - struct list_head active_list; - struct list_head queue; - struct list_head free_list; + struct dma_slave_config dma_sconfig; + bool cyclic; + struct at_desc *desc; }; #define channel_readl(atchan, name) \ @@ -296,11 +289,6 @@ struct at_dma_chan { #define channel_writel(atchan, name, val) \ __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET) -static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan) -{ - return container_of(dchan, struct at_dma_chan, dma_chan); -} - /* * Fix sconfig's burst size according to at_hdmac. We need to convert them as: * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7. @@ -342,7 +330,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @clk: dma controller clock * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask - * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @lli_pool: hw lli table * @chan: channels table to store at_dma_chan structures */ struct at_dma { @@ -353,7 +341,7 @@ struct at_dma { u8 all_chan_mask; - struct dma_pool *dma_desc_pool; + struct dma_pool *lli_pool; struct dma_pool *memset_pool; /* AT THE END channels table */ struct at_dma_chan chan[]; @@ -364,6 +352,16 @@ struct at_dma { #define dma_writel(atdma, name, val) \ __raw_writel((val), (atdma)->regs + AT_DMA_##name) +static inline struct at_desc *to_atdma_desc(struct dma_async_tx_descriptor *t) +{ + return container_of(t, struct at_desc, vd.tx); +} + +static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct at_dma_chan, vc.chan); +} + static inline struct at_dma *to_at_dma(struct dma_device *ddev) { return container_of(ddev, struct at_dma, dma_device); @@ -380,15 +378,15 @@ static struct device *chan2dev(struct dma_chan *chan) #if defined(VERBOSE_DEBUG) static void vdbg_dump_regs(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); - dev_err(chan2dev(&atchan->dma_chan), + dev_err(chan2dev(&atchan->vc.chan), " channel %d : imr = 0x%x, chsr = 0x%x\n", - atchan->dma_chan.chan_id, + atchan->vc.chan.chan_id, dma_readl(atdma, EBCIMR), dma_readl(atdma, CHSR)); - dev_err(chan2dev(&atchan->dma_chan), + dev_err(chan2dev(&atchan->vc.chan), " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n", channel_readl(atchan, SADDR), channel_readl(atchan, DADDR), @@ -403,7 +401,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { - dev_crit(chan2dev(&atchan->dma_chan), + dev_crit(chan2dev(&atchan->vc.chan), "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n", &lli->saddr, &lli->daddr, lli->ctrla, lli->ctrlb, &lli->dscr); @@ -440,7 +438,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id) */ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); return !!(dma_readl(atdma, CHSR) & atchan->mask); } @@ -464,18 +462,19 @@ static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan) } /** - * set_desc_eol - set end-of-link to descriptor so it will end transfer + * set_lli_eol - set end-of-link to descriptor so it will end transfer * @desc: descriptor, signle or at the end of a chain, to end chain on + * @i: index of the atmel scatter gather entry that is at the end of the chain. */ -static void set_desc_eol(struct at_desc *desc) +static void set_lli_eol(struct at_desc *desc, unsigned int i) { - u32 ctrlb = desc->lli.ctrlb; + u32 ctrlb = desc->sg[i].lli->ctrlb; ctrlb &= ~ATC_IEN; ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; - desc->lli.ctrlb = ctrlb; - desc->lli.dscr = 0; + desc->sg[i].lli->ctrlb = ctrlb; + desc->sg[i].lli->dscr = 0; } #define ATC_DEFAULT_CFG FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO) @@ -518,13 +517,6 @@ struct at_dma_slave { u32 cfg; }; -/* prototypes */ -static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx); -static void atc_issue_pending(struct dma_chan *chan); - - -/*----------------------------------------------------------------------*/ - static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst, size_t len) { @@ -540,196 +532,72 @@ static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst, return width; } -static struct at_desc *atc_first_active(struct at_dma_chan *atchan) +static void atdma_lli_chain(struct at_desc *desc, unsigned int i) { - return list_first_entry(&atchan->active_list, - struct at_desc, desc_node); -} + struct atdma_sg *atdma_sg = &desc->sg[i]; -static struct at_desc *atc_first_queued(struct at_dma_chan *atchan) -{ - return list_first_entry(&atchan->queue, - struct at_desc, desc_node); -} - -/** - * atc_alloc_descriptor - allocate and return an initialized descriptor - * @chan: the channel to allocate descriptors for - * @gfp_flags: GFP allocation flags - * - * Note: The ack-bit is positioned in the descriptor flag at creation time - * to make initial allocation more convenient. This bit will be cleared - * and control will be given to client at usage time (during - * preparation functions). - */ -static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, - gfp_t gfp_flags) -{ - struct at_desc *desc = NULL; - struct at_dma *atdma = to_at_dma(chan->device); - dma_addr_t phys; - - desc = dma_pool_zalloc(atdma->dma_desc_pool, gfp_flags, &phys); - if (desc) { - INIT_LIST_HEAD(&desc->tx_list); - dma_async_tx_descriptor_init(&desc->txd, chan); - /* txd.flags will be overwritten in prep functions */ - desc->txd.flags = DMA_CTRL_ACK; - desc->txd.tx_submit = atc_tx_submit; - desc->txd.phys = phys; - } - - return desc; -} - -/** - * atc_desc_get - get an unused descriptor from free_list - * @atchan: channel we want a new descriptor for - */ -static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) -{ - struct at_desc *desc, *_desc; - struct at_desc *ret = NULL; - unsigned long flags; - unsigned int i = 0; - - spin_lock_irqsave(&atchan->lock, flags); - list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { - i++; - if (async_tx_test_ack(&desc->txd)) { - list_del(&desc->desc_node); - ret = desc; - break; - } - dev_dbg(chan2dev(&atchan->dma_chan), - "desc %p not ACKed\n", desc); - } - spin_unlock_irqrestore(&atchan->lock, flags); - dev_vdbg(chan2dev(&atchan->dma_chan), - "scanned %u descriptors on freelist\n", i); - - /* no more descriptor available in initial pool: create one more */ - if (!ret) - ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT); - - return ret; -} - -/** - * atc_desc_put - move a descriptor, including any children, to the free list - * @atchan: channel we work on - * @desc: descriptor, at the head of a chain, to move to free list - */ -static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) -{ - if (desc) { - struct at_desc *child; - unsigned long flags; - - spin_lock_irqsave(&atchan->lock, flags); - list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&atchan->dma_chan), - "moving child desc %p to freelist\n", - child); - list_splice_init(&desc->tx_list, &atchan->free_list); - dev_vdbg(chan2dev(&atchan->dma_chan), - "moving desc %p to freelist\n", desc); - list_add(&desc->desc_node, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - } -} - -/** - * atc_desc_chain - build chain adding a descriptor - * @first: address of first descriptor of the chain - * @prev: address of previous descriptor of the chain - * @desc: descriptor to queue - * - * Called from prep_* functions - */ -static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, - struct at_desc *desc) -{ - if (!(*first)) { - *first = desc; - } else { - /* inform the HW lli about chaining */ - (*prev)->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &(*first)->tx_list); - } - *prev = desc; + if (i) + desc->sg[i - 1].lli->dscr = atdma_sg->lli_phys; } /** * atc_dostart - starts the DMA engine for real * @atchan: the channel we want to start - * @first: first descriptor in the list we want to begin with - * - * Called with atchan->lock held and bh disabled */ -static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) +static void atc_dostart(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&atchan->vc); + struct at_desc *desc; - /* ASSERT: channel is idle */ - if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->dma_chan), - "BUG: Attempted to start non-idle channel\n"); - dev_err(chan2dev(&atchan->dma_chan), - " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", - channel_readl(atchan, SADDR), - channel_readl(atchan, DADDR), - channel_readl(atchan, CTRLA), - channel_readl(atchan, CTRLB), - channel_readl(atchan, DSCR)); - - /* The tasklet will hopefully advance the queue... */ + if (!vd) { + atchan->desc = NULL; return; } vdbg_dump_regs(atchan); + list_del(&vd->node); + atchan->desc = desc = to_atdma_desc(&vd->tx); + channel_writel(atchan, SADDR, 0); channel_writel(atchan, DADDR, 0); channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); - channel_writel(atchan, DSCR, first->txd.phys); - channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE, - first->src_hole) | - FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary)); - channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE, - first->dst_hole) | - FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary)); + channel_writel(atchan, DSCR, desc->sg[0].lli_phys); + channel_writel(atchan, SPIP, + FIELD_PREP(ATC_SPIP_HOLE, desc->src_hole) | + FIELD_PREP(ATC_SPIP_BOUNDARY, desc->boundary)); + channel_writel(atchan, DPIP, + FIELD_PREP(ATC_DPIP_HOLE, desc->dst_hole) | + FIELD_PREP(ATC_DPIP_BOUNDARY, desc->boundary)); + /* Don't allow CPU to reorder channel enable. */ wmb(); - dma_writel(atdma, CHER, atchan->mask); + dma_writel(atchan->atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); } -/* - * atc_get_desc_by_cookie - get the descriptor of a cookie - * @atchan: the DMA channel - * @cookie: the cookie to get the descriptor for - */ -static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, - dma_cookie_t cookie) +static void atdma_desc_free(struct virt_dma_desc *vd) { - struct at_desc *desc, *_desc; + struct at_dma *atdma = to_at_dma(vd->tx.chan->device); + struct at_desc *desc = to_atdma_desc(&vd->tx); + unsigned int i; - list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) { - if (desc->txd.cookie == cookie) - return desc; + for (i = 0; i < desc->sglen; i++) { + if (desc->sg[i].lli) + dma_pool_free(atdma->lli_pool, desc->sg[i].lli, + desc->sg[i].lli_phys); } - list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { - if (desc->txd.cookie == cookie) - return desc; + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; } - return NULL; + kfree(desc); } /** @@ -756,20 +624,19 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) /** * atc_get_llis_residue - Get residue for a hardware linked list transfer * - * Calculate the residue by removing the length of the child descriptors already - * transferred from the total length. To get the current child descriptor we can - * use the value of the channel's DSCR register and compare it against the value - * of the hardware linked list structure of each child descriptor. + * Calculate the residue by removing the length of the Linked List Item (LLI) + * already transferred from the total length. To get the current LLI we can use + * the value of the channel's DSCR register and compare it against the DSCR + * value of each LLI. * * The CTRLA register provides us with the amount of data already read from the - * source for the current child descriptor. So we can compute a more accurate - * residue by also removing the number of bytes corresponding to this amount of - * data. + * source for the LLI. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of data. * * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a - * race condition may occur: the first read register may refer to one child - * descriptor whereas the second read may refer to a later child descriptor in - * the list because of the DMA transfer progression inbetween the two reads. + * race condition may occur: the first read register may refer to one LLI + * whereas the second read may refer to a later LLI in the list because of the + * DMA transfer progression inbetween the two reads. * * One solution could have been to pause the DMA transfer, read the DSCR and * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some @@ -786,12 +653,11 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) * Then we'd rather use another solution: the DSCR is read a first time, the * CTRLA is read in turn, next the DSCR is read a second time. If the two * consecutive read values of the DSCR are the same then we assume both refers - * to the very same child descriptor as well as the CTRLA value read inbetween - * does. For cyclic tranfers, the assumption is that a full loop is "not so - * fast". If the two DSCR values are different, we read again the CTRLA then the - * DSCR till two consecutive read values from DSCR are equal or till the - * maximum trials is reach. This algorithm is very unlikely not to find a stable - * value for DSCR. + * to the very same LLI as well as the CTRLA value read inbetween does. For + * cyclic tranfers, the assumption is that a full loop is "not so fast". If the + * two DSCR values are different, we read again the CTRLA then the DSCR till two + * consecutive read values from DSCR are equal or till the maximum trials is + * reach. This algorithm is very unlikely not to find a stable value for DSCR. * @atchan: pointer to an atmel hdmac channel. * @desc: pointer to the descriptor for which the residue is calculated. * @residue: residue to be set to dma_tx_state. @@ -800,7 +666,6 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) static int atc_get_llis_residue(struct at_dma_chan *atchan, struct at_desc *desc, u32 *residue) { - struct at_desc *child; u32 len, ctrla, dscr; unsigned int i; @@ -836,24 +701,25 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, return -ETIMEDOUT; /* For the first descriptor we can be more accurate. */ - if (desc->lli.dscr == dscr) { + if (desc->sg[0].lli->dscr == dscr) { *residue = atc_calc_bytes_left(len, ctrla); return 0; } + len -= desc->sg[0].len; - len -= desc->len; - list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.dscr == dscr) + for (i = 1; i < desc->sglen; i++) { + if (desc->sg[i].lli && desc->sg[i].lli->dscr == dscr) break; - len -= child->len; + len -= desc->sg[i].len; } /* - * For the current descriptor in the chain we can calculate the - * remaining bytes using the channel's register. + * For the current LLI in the chain we can calculate the remaining bytes + * using the channel's CTRLA register. */ *residue = atc_calc_bytes_left(len, ctrla); return 0; + } /** @@ -867,139 +733,42 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, u32 *residue) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc_first = atc_first_active(atchan); - struct at_desc *desc; + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct virt_dma_desc *vd; + struct at_desc *desc = NULL; u32 len, ctrla; - /* - * If the cookie doesn't match to the currently running transfer then - * we can return the total length of the associated DMA transfer, - * because it is still queued. - */ - desc = atc_get_desc_by_cookie(atchan, cookie); - if (desc == NULL) + vd = vchan_find_desc(&atchan->vc, cookie); + if (vd) + desc = to_atdma_desc(&vd->tx); + else if (atchan->desc && atchan->desc->vd.tx.cookie == cookie) + desc = atchan->desc; + + if (!desc) return -EINVAL; - else if (desc != desc_first) - return desc->total_len; - if (desc_first->lli.dscr) + if (desc->sg[0].lli->dscr) /* hardware linked list transfer */ - return atc_get_llis_residue(atchan, desc_first, residue); + return atc_get_llis_residue(atchan, desc, residue); /* single transfer */ - len = desc_first->total_len; + len = desc->total_len; ctrla = channel_readl(atchan, CTRLA); *residue = atc_calc_bytes_left(len, ctrla); return 0; } -/** - * atc_chain_complete - finish work for one transaction chain - * @atchan: channel we work on - * @desc: descriptor at the head of the chain we want do complete - */ -static void -atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) -{ - struct dma_async_tx_descriptor *txd = &desc->txd; - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->dma_chan), - "descriptor %u complete\n", txd->cookie); - - spin_lock_irqsave(&atchan->lock, flags); - - /* mark the descriptor as complete for non cyclic cases only */ - if (!atc_chan_is_cyclic(atchan)) - dma_cookie_complete(txd); - - spin_unlock_irqrestore(&atchan->lock, flags); - - dma_descriptor_unmap(txd); - /* for cyclic transfers, - * no need to replay callback function while stopping */ - if (!atc_chan_is_cyclic(atchan)) - dmaengine_desc_get_callback_invoke(txd, NULL); - - dma_run_dependencies(txd); - - spin_lock_irqsave(&atchan->lock, flags); - /* move children to free_list */ - list_splice_init(&desc->tx_list, &atchan->free_list); - /* add myself to free_list */ - list_add(&desc->desc_node, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - - /* If the transfer was a memset, free our temporary buffer */ - if (desc->memset_buffer) { - dma_pool_free(atdma->memset_pool, desc->memset_vaddr, - desc->memset_paddr); - desc->memset_buffer = false; - } -} - -/** - * atc_advance_work - at the end of a transaction, move forward - * @atchan: channel where the transaction ended - */ -static void atc_advance_work(struct at_dma_chan *atchan) -{ - struct at_desc *desc; - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n"); - - spin_lock_irqsave(&atchan->lock, flags); - if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) - return spin_unlock_irqrestore(&atchan->lock, flags); - - desc = atc_first_active(atchan); - /* Remove the transfer node from the active list. */ - list_del_init(&desc->desc_node); - spin_unlock_irqrestore(&atchan->lock, flags); - atc_chain_complete(atchan, desc); - - /* advance work */ - spin_lock_irqsave(&atchan->lock, flags); - if (!list_empty(&atchan->active_list)) { - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - } - spin_unlock_irqrestore(&atchan->lock, flags); -} - - /** * atc_handle_error - handle errors reported by DMA controller - * @atchan: channel where error occurs + * @atchan: channel where error occurs. + * @i: channel index */ -static void atc_handle_error(struct at_dma_chan *atchan) +static void atc_handle_error(struct at_dma_chan *atchan, unsigned int i) { - struct at_desc *bad_desc; - struct at_desc *desc; - struct at_desc *child; - unsigned long flags; - - spin_lock_irqsave(&atchan->lock, flags); - /* - * The descriptor currently at the head of the active list is - * broked. Since we don't have any way to report errors, we'll - * just have to scream loudly and try to carry on. - */ - bad_desc = atc_first_active(atchan); - list_del_init(&bad_desc->desc_node); - - /* Try to restart the controller */ - if (!list_empty(&atchan->active_list)) { - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - } + struct at_desc *desc = atchan->desc; - spin_unlock_irqrestore(&atchan->lock, flags); + /* Disable channel on AHB error */ + dma_writel(atchan->atdma, CHDR, AT_DMA_RES(i) | atchan->mask); /* * KERN_CRITICAL may seem harsh, but since this only happens @@ -1008,47 +777,37 @@ static void atc_handle_error(struct at_dma_chan *atchan) * controller flagged an error instead of scribbling over * random memory locations. */ - dev_crit(chan2dev(&atchan->dma_chan), - "Bad descriptor submitted for DMA!\n"); - dev_crit(chan2dev(&atchan->dma_chan), - " cookie: %d\n", bad_desc->txd.cookie); - atc_dump_lli(atchan, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->tx_list, desc_node) - atc_dump_lli(atchan, &child->lli); - - /* Pretend the descriptor completed successfully */ - atc_chain_complete(atchan, bad_desc); -} - -/** - * atc_handle_cyclic - at the end of a period, run callback function - * @atchan: channel used for cyclic operations - */ -static void atc_handle_cyclic(struct at_dma_chan *atchan) -{ - struct at_desc *first = atc_first_active(atchan); - struct dma_async_tx_descriptor *txd = &first->txd; - - dev_vdbg(chan2dev(&atchan->dma_chan), - "new cyclic period llp 0x%08x\n", - channel_readl(atchan, DSCR)); - - dmaengine_desc_get_callback_invoke(txd, NULL); + dev_crit(chan2dev(&atchan->vc.chan), "Bad descriptor submitted for DMA!\n"); + dev_crit(chan2dev(&atchan->vc.chan), "cookie: %d\n", + desc->vd.tx.cookie); + for (i = 0; i < desc->sglen; i++) + atc_dump_lli(atchan, desc->sg[i].lli); } -/*-- IRQ & Tasklet ---------------------------------------------------*/ - -static void atc_tasklet(struct tasklet_struct *t) +static void atdma_handle_chan_done(struct at_dma_chan *atchan, u32 pending, + unsigned int i) { - struct at_dma_chan *atchan = from_tasklet(atchan, t, tasklet); + struct at_desc *desc; - if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) - return atc_handle_error(atchan); + spin_lock(&atchan->vc.lock); + desc = atchan->desc; - if (atc_chan_is_cyclic(atchan)) - return atc_handle_cyclic(atchan); + if (desc) { + if (pending & AT_DMA_ERR(i)) { + atc_handle_error(atchan, i); + /* Pretend the descriptor completed successfully */ + } - atc_advance_work(atchan); + if (atc_chan_is_cyclic(atchan)) { + vchan_cyclic_callback(&desc->vd); + } else { + vchan_cookie_complete(&desc->vd); + atchan->desc = NULL; + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + } + spin_unlock(&atchan->vc.lock); } static irqreturn_t at_dma_interrupt(int irq, void *dev_id) @@ -1073,17 +832,10 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) for (i = 0; i < atdma->dma_device.chancnt; i++) { atchan = &atdma->chan[i]; - if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { - if (pending & AT_DMA_ERR(i)) { - /* Disable channel on AHB error */ - dma_writel(atdma, CHDR, - AT_DMA_RES(i) | atchan->mask); - /* Give information to tasklet */ - set_bit(ATC_IS_ERROR, &atchan->status); - } - tasklet_schedule(&atchan->tasklet); - ret = IRQ_HANDLED; - } + if (!(pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i)))) + continue; + atdma_handle_chan_done(atchan, pending, i); + ret = IRQ_HANDLED; } } while (pending); @@ -1091,35 +843,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) return ret; } - /*-- DMA Engine API --------------------------------------------------*/ - -/** - * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine - * @tx: descriptor at the head of the transaction chain - * - * Queue chain if DMA engine is working already - * - * Cookie increment and adding to active_list or queue must be atomic - */ -static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct at_desc *desc = txd_to_at_desc(tx); - struct at_dma_chan *atchan = to_at_dma_chan(tx->chan); - dma_cookie_t cookie; - unsigned long flags; - - spin_lock_irqsave(&atchan->lock, flags); - cookie = dma_cookie_assign(tx); - - list_add_tail(&desc->desc_node, &atchan->queue); - spin_unlock_irqrestore(&atchan->lock, flags); - - dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", - desc->txd.cookie); - return cookie; -} - /** * atc_prep_dma_interleaved - prepare memory to memory interleaved operation * @chan: the channel to prepare operation on @@ -1131,9 +855,12 @@ atc_prep_dma_interleaved(struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct data_chunk *first; - struct at_desc *desc = NULL; + struct atdma_sg *atdma_sg; + struct at_desc *desc; + struct at_lli *lli; size_t xfer_count; unsigned int dwidth; u32 ctrla; @@ -1172,8 +899,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan, len += chunk->size; } - dwidth = atc_get_xfer_width(xt->src_start, - xt->dst_start, len); + dwidth = atc_get_xfer_width(xt->src_start, xt->dst_start, len); xfer_count = len >> dwidth; if (xfer_count > ATC_BTSIZE_MAX) { @@ -1190,32 +916,34 @@ atc_prep_dma_interleaved(struct dma_chan *chan, ATC_SRC_PIP | ATC_DST_PIP | FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM); - /* create the transfer */ - desc = atc_desc_get(atchan); - if (!desc) { - dev_err(chan2dev(chan), - "%s: couldn't allocate our descriptor\n", __func__); + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = 1; + + atdma_sg = desc->sg; + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) { + kfree(desc); return NULL; } + lli = atdma_sg->lli; - desc->lli.saddr = xt->src_start; - desc->lli.daddr = xt->dst_start; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + lli->saddr = xt->src_start; + lli->daddr = xt->dst_start; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; desc->boundary = first->size >> dwidth; desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; - desc->txd.cookie = -EBUSY; - desc->total_len = desc->len = len; - - /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(desc); - - desc->txd.flags = flags; /* client is in control of this ack */ + atdma_sg->len = len; + desc->total_len = len; - return &desc->txd; + set_lli_eol(desc, 0); + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); } /** @@ -1230,25 +958,32 @@ static struct dma_async_tx_descriptor * atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_desc *desc = NULL; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; size_t xfer_count; size_t offset; + size_t sg_len; unsigned int src_width; unsigned int dst_width; + unsigned int i; u32 ctrla; u32 ctrlb; - dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", - &dest, &src, len, flags); + dev_dbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", + &dest, &src, len, flags); if (unlikely(!len)) { - dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + dev_err(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); return NULL; } + sg_len = DIV_ROUND_UP(len, ATC_BTSIZE_MAX); + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) | FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) | @@ -1263,50 +998,49 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) | FIELD_PREP(ATC_DST_WIDTH, dst_width); - for (offset = 0; offset < len; offset += xfer_count << src_width) { - xfer_count = min_t(size_t, (len - offset) >> src_width, - ATC_BTSIZE_MAX); + for (offset = 0, i = 0; offset < len; + offset += xfer_count << src_width, i++) { + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; + + xfer_count = min_t(size_t, (len - offset) >> src_width, + ATC_BTSIZE_MAX); - desc->lli.saddr = src + offset; - desc->lli.daddr = dest + offset; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + lli->saddr = src + offset; + lli->daddr = dest + offset; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; - desc->txd.cookie = 0; - desc->len = xfer_count << src_width; + desc->sg[i].len = xfer_count << src_width; - atc_desc_chain(&first, &prev, desc); + atdma_lli_chain(desc, i); } - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = len; + desc->total_len = len; /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(desc); - - first->txd.flags = flags; /* client is in control of this ack */ + set_lli_eol(desc, i - 1); - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); err_desc_get: - atc_desc_put(atchan, first); + atdma_desc_free(&desc->vd); return NULL; } -static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, - dma_addr_t psrc, - dma_addr_t pdst, - size_t len) +static int atdma_create_memset_lli(struct dma_chan *chan, + struct atdma_sg *atdma_sg, + dma_addr_t psrc, dma_addr_t pdst, size_t len) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc; + struct at_dma *atdma = to_at_dma(chan->device); + struct at_lli *lli; size_t xfer_count; - u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2); u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) | @@ -1315,27 +1049,24 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan, xfer_count = len >> 2; if (xfer_count > ATC_BTSIZE_MAX) { - dev_err(chan2dev(chan), "%s: buffer is too big\n", - __func__); - return NULL; + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return -EINVAL; } - desc = atc_desc_get(atchan); - if (!desc) { - dev_err(chan2dev(chan), "%s: can't get a descriptor\n", - __func__); - return NULL; - } + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; - desc->lli.saddr = psrc; - desc->lli.daddr = pdst; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; + lli->saddr = psrc; + lli->daddr = pdst; + lli->ctrla = ctrla | xfer_count; + lli->ctrlb = ctrlb; - desc->txd.cookie = 0; - desc->len = len; + atdma_sg->len = len; - return desc; + return 0; } /** @@ -1350,11 +1081,13 @@ static struct dma_async_tx_descriptor * atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags) { + struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); struct at_desc *desc; void __iomem *vaddr; dma_addr_t paddr; char fill_pattern; + int ret; dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__, &dest, value, len, flags); @@ -1385,27 +1118,28 @@ atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, (fill_pattern << 8) | fill_pattern; - desc = atc_create_memset_desc(chan, paddr, dest, len); - if (!desc) { - dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n", - __func__); + desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC); + if (!desc) goto err_free_buffer; - } + desc->sglen = 1; + + ret = atdma_create_memset_lli(chan, desc->sg, paddr, dest, len); + if (ret) + goto err_free_desc; desc->memset_paddr = paddr; desc->memset_vaddr = vaddr; desc->memset_buffer = true; - desc->txd.cookie = -EBUSY; desc->total_len = len; /* set end-of-link on the descriptor */ - set_desc_eol(desc); - - desc->txd.flags = flags; + set_lli_eol(desc, 0); - return &desc->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); +err_free_desc: + kfree(desc); err_free_buffer: dma_pool_free(atdma->memset_pool, vaddr, paddr); return NULL; @@ -1419,12 +1153,13 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc = NULL, *first = NULL, *prev = NULL; + struct at_desc *desc; struct scatterlist *sg; void __iomem *vaddr; dma_addr_t paddr; size_t total_len = 0; int i; + int ret; dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__, value, sg_len, flags); @@ -1443,6 +1178,11 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, } *(u32*)vaddr = value; + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + goto err_free_dma_buf; + desc->sglen = sg_len; + for_each_sg(sgl, sg, sg_len, i) { dma_addr_t dest = sg_dma_address(sg); size_t len = sg_dma_len(sg); @@ -1453,38 +1193,33 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { dev_err(chan2dev(chan), "%s: buffer is not aligned\n", __func__); - goto err_put_desc; + goto err_free_desc; } - desc = atc_create_memset_desc(chan, paddr, dest, len); - if (!desc) - goto err_put_desc; - - atc_desc_chain(&first, &prev, desc); + ret = atdma_create_memset_lli(chan, &desc->sg[i], paddr, dest, + len); + if (ret) + goto err_free_desc; + atdma_lli_chain(desc, i); total_len += len; } - /* - * Only set the buffer pointers on the last descriptor to - * avoid free'ing while we have our transfer still going - */ desc->memset_paddr = paddr; desc->memset_vaddr = vaddr; desc->memset_buffer = true; - first->txd.cookie = -EBUSY; - first->total_len = total_len; + desc->total_len = total_len; /* set end-of-link on the descriptor */ - set_desc_eol(desc); - - first->txd.flags = flags; + set_lli_eol(desc, i - 1); - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); -err_put_desc: - atc_desc_put(atchan, first); +err_free_desc: + atdma_desc_free(&desc->vd); +err_free_dma_buf: + dma_pool_free(atdma->memset_pool, vaddr, paddr); return NULL; } @@ -1502,11 +1237,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; struct dma_slave_config *sconfig = &atchan->dma_sconfig; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; + struct at_desc *desc; u32 ctrla; u32 ctrlb; dma_addr_t reg; @@ -1526,6 +1261,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, return NULL; } + desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC); + if (!desc) + return NULL; + desc->sglen = sg_len; + ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst); ctrlb = ATC_IEN; @@ -1542,13 +1282,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, FIELD_PREP(ATC_DIF, atchan->per_if); reg = sconfig->dst_addr; for_each_sg(sgl, sg, sg_len, i) { - struct at_desc *desc; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; u32 len; u32 mem; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; mem = sg_dma_address(sg); len = sg_dma_len(sg); @@ -1561,16 +1305,18 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; - desc->lli.saddr = mem; - desc->lli.daddr = reg; - desc->lli.ctrla = ctrla | - FIELD_PREP(ATC_SRC_WIDTH, mem_width) | - len >> mem_width; - desc->lli.ctrlb = ctrlb; - desc->len = len; + lli->saddr = mem; + lli->daddr = reg; + lli->ctrla = ctrla | + FIELD_PREP(ATC_SRC_WIDTH, mem_width) | + len >> mem_width; + lli->ctrlb = ctrlb; - atc_desc_chain(&first, &prev, desc); + atdma_sg->len = len; total_len += len; + + desc->sg[i].len = len; + atdma_lli_chain(desc, i); } break; case DMA_DEV_TO_MEM: @@ -1585,13 +1331,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = sconfig->src_addr; for_each_sg(sgl, sg, sg_len, i) { - struct at_desc *desc; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; u32 len; u32 mem; - desc = atc_desc_get(atchan); - if (!desc) + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, + GFP_NOWAIT, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) goto err_desc_get; + lli = atdma_sg->lli; mem = sg_dma_address(sg); len = sg_dma_len(sg); @@ -1604,16 +1354,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; - desc->lli.saddr = reg; - desc->lli.daddr = mem; - desc->lli.ctrla = ctrla | - FIELD_PREP(ATC_DST_WIDTH, mem_width) | - len >> reg_width; - desc->lli.ctrlb = ctrlb; - desc->len = len; + lli->saddr = reg; + lli->daddr = mem; + lli->ctrla = ctrla | + FIELD_PREP(ATC_DST_WIDTH, mem_width) | + len >> reg_width; + lli->ctrlb = ctrlb; - atc_desc_chain(&first, &prev, desc); + desc->sg[i].len = len; total_len += len; + + atdma_lli_chain(desc, i); } break; default: @@ -1621,21 +1372,16 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(prev); - - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = total_len; + set_lli_eol(desc, i - 1); - /* first link descriptor of list is responsible of flags */ - first->txd.flags = flags; /* client is in control of this ack */ + desc->total_len = total_len; - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); err_desc_get: dev_err(chan2dev(chan), "not enough descriptors available\n"); err: - atc_desc_put(atchan, first); + atdma_desc_free(&desc->vd); return NULL; } @@ -1665,54 +1411,59 @@ atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr, */ static int atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, - unsigned int period_index, dma_addr_t buf_addr, + unsigned int i, dma_addr_t buf_addr, unsigned int reg_width, size_t period_len, enum dma_transfer_direction direction) { + struct at_dma *atdma = to_at_dma(chan->device); struct at_dma_chan *atchan = to_at_dma_chan(chan); struct dma_slave_config *sconfig = &atchan->dma_sconfig; - u32 ctrla; + struct atdma_sg *atdma_sg = &desc->sg[i]; + struct at_lli *lli; - /* prepare common CRTLA value */ - ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | - FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | - FIELD_PREP(ATC_DST_WIDTH, reg_width) | - FIELD_PREP(ATC_SRC_WIDTH, reg_width) | - period_len >> reg_width; + atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_ATOMIC, + &atdma_sg->lli_phys); + if (!atdma_sg->lli) + return -ENOMEM; + lli = atdma_sg->lli; switch (direction) { case DMA_MEM_TO_DEV: - desc->lli.saddr = buf_addr + (period_len * period_index); - desc->lli.daddr = sconfig->dst_addr; - desc->lli.ctrla = ctrla; - desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, - ATC_DST_ADDR_MODE_FIXED) | - FIELD_PREP(ATC_SRC_ADDR_MODE, - ATC_SRC_ADDR_MODE_INCR) | - FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | - FIELD_PREP(ATC_SIF, atchan->mem_if) | - FIELD_PREP(ATC_DIF, atchan->per_if); - desc->len = period_len; + lli->saddr = buf_addr + (period_len * i); + lli->daddr = sconfig->dst_addr; + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_INCR) | + FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) | + FIELD_PREP(ATC_SIF, atchan->mem_if) | + FIELD_PREP(ATC_DIF, atchan->per_if); + break; case DMA_DEV_TO_MEM: - desc->lli.saddr = sconfig->src_addr; - desc->lli.daddr = buf_addr + (period_len * period_index); - desc->lli.ctrla = ctrla; - desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, - ATC_DST_ADDR_MODE_INCR) | - FIELD_PREP(ATC_SRC_ADDR_MODE, - ATC_SRC_ADDR_MODE_FIXED) | - FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | - FIELD_PREP(ATC_SIF, atchan->per_if) | - FIELD_PREP(ATC_DIF, atchan->mem_if); - desc->len = period_len; + lli->saddr = sconfig->src_addr; + lli->daddr = buf_addr + (period_len * i); + lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE, + ATC_DST_ADDR_MODE_INCR) | + FIELD_PREP(ATC_SRC_ADDR_MODE, + ATC_SRC_ADDR_MODE_FIXED) | + FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) | + FIELD_PREP(ATC_SIF, atchan->per_if) | + FIELD_PREP(ATC_DIF, atchan->mem_if); break; default: return -EINVAL; } + lli->ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) | + FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) | + FIELD_PREP(ATC_DST_WIDTH, reg_width) | + FIELD_PREP(ATC_SRC_WIDTH, reg_width) | + period_len >> reg_width; + desc->sg[i].len = period_len; + return 0; } @@ -1733,8 +1484,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; struct dma_slave_config *sconfig = &atchan->dma_sconfig; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; + struct at_desc *desc; unsigned long was_cyclic; unsigned int reg_width; unsigned int periods = buf_len / period_len; @@ -1768,33 +1518,26 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len)) goto err_out; + desc = kzalloc(struct_size(desc, sg, periods), GFP_ATOMIC); + if (!desc) + goto err_out; + desc->sglen = periods; + /* build cyclic linked list */ for (i = 0; i < periods; i++) { - struct at_desc *desc; - - desc = atc_desc_get(atchan); - if (!desc) - goto err_desc_get; - if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr, reg_width, period_len, direction)) - goto err_desc_get; - - atc_desc_chain(&first, &prev, desc); + goto err_fill_desc; + atdma_lli_chain(desc, i); } - + desc->total_len = buf_len; /* lets make a cyclic list */ - prev->lli.dscr = first->txd.phys; - - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = buf_len; + desc->sg[i - 1].lli->dscr = desc->sg[0].lli_phys; - return &first->txd; + return vchan_tx_prep(&atchan->vc, &desc->vd, flags); -err_desc_get: - dev_err(chan2dev(chan), "not enough descriptors available\n"); - atc_desc_put(atchan, first); +err_fill_desc: + atdma_desc_free(&desc->vd); err_out: clear_bit(ATC_IS_CYCLIC, &atchan->status); return NULL; @@ -1823,17 +1566,17 @@ static int atc_pause(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); set_bit(ATC_IS_PAUSED, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); return 0; } @@ -1842,7 +1585,7 @@ static int atc_resume(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1850,12 +1593,12 @@ static int atc_resume(struct dma_chan *chan) if (!atc_chan_is_paused(atchan)) return 0; - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); clear_bit(ATC_IS_PAUSED, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); return 0; } @@ -1864,9 +1607,11 @@ static int atc_terminate_all(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->dma_chan.chan_id; + int chan_id = atchan->vc.chan.chan_id; unsigned long flags; + LIST_HEAD(list); + dev_vdbg(chan2dev(chan), "%s\n", __func__); /* @@ -1875,7 +1620,7 @@ static int atc_terminate_all(struct dma_chan *chan) * channel. We still have to poll the channel enable bit due * to AHB/HSB limitations. */ - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); /* disabling channel: must also remove suspend state */ dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); @@ -1884,15 +1629,20 @@ static int atc_terminate_all(struct dma_chan *chan) while (dma_readl(atdma, CHSR) & atchan->mask) cpu_relax(); - /* active_list entries will end up before queued entries */ - list_splice_tail_init(&atchan->queue, &atchan->free_list); - list_splice_tail_init(&atchan->active_list, &atchan->free_list); + if (atchan->desc) { + vchan_terminate_vdesc(&atchan->desc->vd); + atchan->desc = NULL; + } + + vchan_get_all_descriptors(&atchan->vc, &list); clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); + + vchan_dma_desc_free_list(&atchan->vc, &list); return 0; } @@ -1922,9 +1672,10 @@ atc_tx_status(struct dma_chan *chan, if (dma_status == DMA_COMPLETE || !txstate) return dma_status; - spin_lock_irqsave(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); + /* Get number of bytes left in the active transactions */ ret = atc_get_residue(chan, cookie, &residue); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_unlock_irqrestore(&atchan->vc.lock, flags); if (unlikely(ret < 0)) { dev_vdbg(chan2dev(chan), "get residual bytes error\n"); @@ -1939,27 +1690,17 @@ atc_tx_status(struct dma_chan *chan, return dma_status; } -/** - * atc_issue_pending - takes the first transaction descriptor in the pending - * queue and starts the transfer. - * @chan: target DMA channel - */ static void atc_issue_pending(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc; unsigned long flags; - dev_vdbg(chan2dev(chan), "issue_pending\n"); - - spin_lock_irqsave(&atchan->lock, flags); - if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue)) - return spin_unlock_irqrestore(&atchan->lock, flags); - - desc = atc_first_queued(atchan); - list_move_tail(&desc->desc_node, &atchan->active_list); - atc_dostart(atchan, desc); - spin_unlock_irqrestore(&atchan->lock, flags); + spin_lock_irqsave(&atchan->vc.lock, flags); + if (vchan_issue_pending(&atchan->vc) && !atchan->desc) { + if (!(atc_chan_is_enabled(atchan))) + atc_dostart(atchan); + } + spin_unlock_irqrestore(&atchan->vc.lock, flags); } /** @@ -1972,9 +1713,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc; struct at_dma_slave *atslave; - int i; u32 cfg; dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); @@ -1985,11 +1724,6 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) return -EIO; } - if (!list_empty(&atchan->free_list)) { - dev_dbg(chan2dev(chan), "can't allocate channel resources (channel not freed from a previous use)\n"); - return -EIO; - } - cfg = ATC_DEFAULT_CFG; atslave = chan->private; @@ -2005,26 +1739,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) cfg = atslave->cfg; } - /* Allocate initial pool of descriptors */ - for (i = 0; i < init_nr_desc_per_channel; i++) { - desc = atc_alloc_descriptor(chan, GFP_KERNEL); - if (!desc) { - dev_err(atdma->dma_device.dev, - "Only %d initial descriptors\n", i); - break; - } - list_add_tail(&desc->desc_node, &atchan->free_list); - } - - dma_cookie_init(chan); - /* channel parameters */ channel_writel(atchan, CFG, cfg); - dev_dbg(chan2dev(chan), - "alloc_chan_resources: allocated %d descriptors\n", i); - - return i; + return 0; } /** @@ -2034,22 +1752,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) static void atc_free_chan_resources(struct dma_chan *chan) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc, *_desc; - LIST_HEAD(list); - /* ASSERT: channel is idle */ - BUG_ON(!list_empty(&atchan->active_list)); - BUG_ON(!list_empty(&atchan->queue)); BUG_ON(atc_chan_is_enabled(atchan)); - list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) { - dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); - list_del(&desc->desc_node); - /* free link descriptor */ - dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys); - } - list_splice_init(&atchan->free_list, &list); + vchan_free_chan_resources(to_virt_chan(chan)); atchan->status = 0; /* @@ -2274,11 +1980,11 @@ static int __init at_dma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, atdma); /* create a pool of consistent memory blocks for hardware descriptors */ - atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool", - &pdev->dev, sizeof(struct at_desc), - 4 /* word alignment */, 0); - if (!atdma->dma_desc_pool) { - dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + atdma->lli_pool = dma_pool_create("at_hdmac_lli_pool", + &pdev->dev, sizeof(struct at_lli), + 4 /* word alignment */, 0); + if (!atdma->lli_pool) { + dev_err(&pdev->dev, "Unable to allocate DMA LLI descriptor pool\n"); err = -ENOMEM; goto err_desc_pool_create; } @@ -2303,20 +2009,13 @@ static int __init at_dma_probe(struct platform_device *pdev) atchan->mem_if = AT_DMA_MEM_IF; atchan->per_if = AT_DMA_PER_IF; - atchan->dma_chan.device = &atdma->dma_device; - dma_cookie_init(&atchan->dma_chan); - list_add_tail(&atchan->dma_chan.device_node, - &atdma->dma_device.channels); atchan->ch_regs = atdma->regs + ch_regs(i); - spin_lock_init(&atchan->lock); atchan->mask = 1 << i; - INIT_LIST_HEAD(&atchan->active_list); - INIT_LIST_HEAD(&atchan->queue); - INIT_LIST_HEAD(&atchan->free_list); - - tasklet_setup(&atchan->tasklet, atc_tasklet); + atchan->atdma = atdma; + atchan->vc.desc_free = atdma_desc_free; + vchan_init(&atchan->vc, &atdma->dma_device); atc_enable_chan_irq(atdma, i); } @@ -2390,7 +2089,7 @@ static int __init at_dma_probe(struct platform_device *pdev) err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: - dma_pool_destroy(atdma->dma_desc_pool); + dma_pool_destroy(atdma->lli_pool); err_desc_pool_create: free_irq(platform_get_irq(pdev, 0), atdma); err_irq: @@ -2409,17 +2108,13 @@ static int at_dma_remove(struct platform_device *pdev) dma_async_device_unregister(&atdma->dma_device); dma_pool_destroy(atdma->memset_pool); - dma_pool_destroy(atdma->dma_desc_pool); + dma_pool_destroy(atdma->lli_pool); free_irq(platform_get_irq(pdev, 0), atdma); list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels, device_node) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); - /* Disable interrupts */ atc_disable_chan_irq(atdma, chan->chan_id); - - tasklet_kill(&atchan->tasklet); list_del(&chan->device_node); } @@ -2453,7 +2148,7 @@ static int at_dma_prepare(struct device *dev) static void atc_suspend_cyclic(struct at_dma_chan *atchan) { - struct dma_chan *chan = &atchan->dma_chan; + struct dma_chan *chan = &atchan->vc.chan; /* Channel should be paused by user * do it anyway even if it is not done already */ @@ -2494,7 +2189,7 @@ static int at_dma_suspend_noirq(struct device *dev) static void atc_resume_cyclic(struct at_dma_chan *atchan) { - struct at_dma *atdma = to_at_dma(atchan->dma_chan.device); + struct at_dma *atdma = to_at_dma(atchan->vc.chan.device); /* restore channel status for cyclic descriptors list: * next descriptor in the cyclic list at the time of suspend */ @@ -2568,5 +2263,6 @@ module_exit(at_dma_exit); MODULE_DESCRIPTION("Atmel AHB DMA Controller driver"); MODULE_AUTHOR("Nicolas Ferre "); +MODULE_AUTHOR("Tudor Ambarus "); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:at_hdmac"); -- GitLab From c5e7ee72862eeeee77fd71b99fa9064f830e0b00 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 23 Jul 2021 12:15:59 -0400 Subject: [PATCH 193/694] Revert "csky: Fixup CONFIG_DEBUG_RSEQ" This reverts commit f36e0aab6f1f78d770ce859df3f07a9c5763ce5f. The csky rseq support has been merged without ever notifying the rseq maintainers, and without any of the required asssembler glue in the rseq selftests, which means it is entirely untested. It is also derived from a non-upstream riscv patch which has known bugs. The assembly part of this revert should be carefully reviewed by the architecture maintainer because it touches code which has changed since the merge of the reverted patch. The rseq selftests assembly glue should be introduced at the same time as the architecture rseq support. Without the presence of any test, I recommend reverting rseq support from csky for now. Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/ Signed-off-by: Mathieu Desnoyers Signed-off-by: Guo Ren Cc: Al Viro Cc: Linus Torvalds Cc: linux-csky@vger.kernel.org --- arch/csky/kernel/entry.S | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 547b4cd1b24b4..a4b519fbb371f 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -50,11 +50,15 @@ ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE zero_fp context_tracking +#ifdef CONFIG_RSEQ_DEBUG + mov a0, sp + jbsr rseq_syscall +#endif psrset ee, ie lrw r9, __NR_syscalls cmphs syscallid, r9 /* Check nr of syscall */ - bt 1f + bt ret_from_exception lrw r9, sys_call_table ixw r9, syscallid @@ -80,11 +84,6 @@ ENTRY(csky_systemcall) jsr syscallid #endif stw a0, (sp, LSAVE_A0) /* Save return value */ -1: -#ifdef CONFIG_DEBUG_RSEQ - mov a0, sp - jbsr rseq_syscall -#endif jmpi ret_from_exception csky_syscall_trace: @@ -113,10 +112,6 @@ csky_syscall_trace: stw a0, (sp, LSAVE_A0) /* Save return value */ 1: -#ifdef CONFIG_DEBUG_RSEQ - mov a0, sp - jbsr rseq_syscall -#endif mov a0, sp /* right now, sp --> pt_regs */ jbsr syscall_trace_exit br ret_from_exception -- GitLab From 7e2004906fb52257772be0ef262fba2d5eb1653b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 23 Jul 2021 12:16:00 -0400 Subject: [PATCH 194/694] Revert "csky: Add support for restartable sequence" This reverts commit 9866d141a0977ace974400bf1f793dfc163409ce. The csky rseq support has been merged without ever notifying the rseq maintainers, and without any of the required asssembler glue in the rseq selftests, which means it is entirely untested. It is also derived from a non-upstream riscv patch which has known bugs. The assembly part of this revert should be carefully reviewed by the architecture maintainer because it touches code which has changed since the merge of the reverted patch. The rseq selftests assembly glue should be introduced at the same time as the architecture rseq support. Without the presence of any test, I recommend reverting rseq support from csky for now. Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/ Signed-off-by: Mathieu Desnoyers Signed-off-by: Guo Ren Cc: Al Viro Cc: Linus Torvalds Cc: linux-csky@vger.kernel.org --- arch/csky/Kconfig | 1 - arch/csky/kernel/entry.S | 4 ---- arch/csky/kernel/signal.c | 2 -- 3 files changed, 7 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index e0ecd1cc81a94..dba02da6fa344 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -94,7 +94,6 @@ config CSKY select HAVE_PERF_USER_STACK_DUMP select HAVE_DMA_CONTIGUOUS select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select MAY_HAVE_SPARSE_IRQ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a4b519fbb371f..c68cdcc76d60e 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -50,10 +50,6 @@ ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE zero_fp context_tracking -#ifdef CONFIG_RSEQ_DEBUG - mov a0, sp - jbsr rseq_syscall -#endif psrset ee, ie lrw r9, __NR_syscalls diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index b7b3685283d76..10da0fefd4319 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -179,8 +179,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; - rseq_signal_deliver(ksig, regs); - /* Are we from a system call? */ if (in_syscall(regs)) { /* Avoid additional syscall restarting via ret_from_exception */ -- GitLab From dc901d98b1fe6e52ab81cd3e0879379168e06daa Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 10 Nov 2022 17:27:15 -0800 Subject: [PATCH 195/694] dmaengine: idxd: Fix crc_val field for completion record The crc_val in the completion record should be 64 bits and not 32 bits. Fixes: 4ac823e9cd85 ("dmaengine: idxd: fix delta_rec and crc size field for completion record") Reported-by: Nirav N Shah Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221111012715.2031481-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 2b9e7feba3f32..1d553bedbdb51 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -295,7 +295,7 @@ struct dsa_completion_record { }; uint32_t delta_rec_size; - uint32_t crc_val; + uint64_t crc_val; /* DIF check & strip */ struct { -- GitLab From 22c354cf3fec6aa52cf2df6685b33ce5f265edf8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 9 Nov 2022 12:12:27 +0100 Subject: [PATCH 196/694] dt-bindings: dmaengine: qcom: gpi: add compatible for SM6375 Document the compatible for GPI DMA controller on SM6375 SoC. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221109111236.46003-3-konrad.dybcio@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 232895fa1d8d2..e7ba1c47a88ea 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -26,6 +26,7 @@ properties: - enum: - qcom,sc7280-gpi-dma - qcom,sm6115-gpi-dma + - qcom,sm6375-gpi-dma - qcom,sm8350-gpi-dma - qcom,sm8450-gpi-dma - const: qcom,sm6350-gpi-dma -- GitLab From 444eef7d5695393f214d83180f3e4bb99621cd07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:02 +0100 Subject: [PATCH 197/694] dmaengine: idxd: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/20221113202428.573536003@linutronix.de Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 6f44fa8f78a5d..06f5d3783d771 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "../dmaengine.h" #include "idxd.h" -- GitLab From d57b2a65cde743a490a848236641fe9aa5536a9b Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Nov 2022 22:47:46 +0530 Subject: [PATCH 198/694] dt-bindings: dmaengine: Add dma-channel-mask to Tegra GPCDMA Add dma-channel-mask property in Tegra GPCDMA document. The property would help to specify the channels to be used in kernel and reserve few for the firmware. This was previously achieved by limiting the channel number to 31 in the driver. This is wrong and does not align with the hardware. Correct this and set the max interrupts to 32. Signed-off-by: Akhil R Acked-by: Thierry Reding Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221110171748.40304-2-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml index c8894476b6abf..851bd50ee67fe 100644 --- a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml +++ b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml @@ -39,7 +39,7 @@ properties: Should contain all of the per-channel DMA interrupts in ascending order with respect to the DMA channel index. minItems: 1 - maxItems: 31 + maxItems: 32 resets: maxItems: 1 @@ -52,6 +52,9 @@ properties: dma-coherent: true + dma-channel-mask: + maxItems: 1 + required: - compatible - reg @@ -60,6 +63,7 @@ required: - reset-names - "#dma-cells" - iommus + - dma-channel-mask additionalProperties: false @@ -108,5 +112,6 @@ examples: #dma-cells = <1>; iommus = <&smmu TEGRA186_SID_GPCDMA_0>; dma-coherent; + dma-channel-mask = <0xfffffffe>; }; ... -- GitLab From 3a0c95b61385f583424f44e79c15f1bdf050776d Mon Sep 17 00:00:00 2001 From: Akhil R Date: Thu, 10 Nov 2022 22:47:48 +0530 Subject: [PATCH 199/694] dmaengine: tegra: Add support for dma-channel-mask Add support for dma-channel-mask so that only the specified channels are used. This helps to reserve some channels for the firmware. This was initially achieved by limiting the channel number to 31 in the driver and adjusting the register address to skip channel0 which was reserved for a firmware. This is wrong and does not align with the hardware. Now, with this change, the driver can align more to the actual hardware which has 32 channels. But this implies that there will be a break in the ABI and the device tree need to be updated along with this change for the driver to pickup the right interrupt corresponding to the channel Reviewed-by: Jon Hunter Link: https://lore.kernel.org/all/Y2EFoG1H9YpfxRjs@orome/ Signed-off-by: Akhil R Link: https://lore.kernel.org/r/20221110171748.40304-4-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra186-gpc-dma.c | 37 +++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index fa9bda4a2bc6f..1d1180db6d4ec 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -161,7 +161,10 @@ #define TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT 5000 /* 5 msec */ /* Channel base address offset from GPCDMA base address */ -#define TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET 0x20000 +#define TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET 0x10000 + +/* Default channel mask reserving channel0 */ +#define TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK 0xfffffffe struct tegra_dma; struct tegra_dma_channel; @@ -246,6 +249,7 @@ struct tegra_dma { const struct tegra_dma_chip_data *chip_data; unsigned long sid_m2d_reserved; unsigned long sid_d2m_reserved; + u32 chan_mask; void __iomem *base_addr; struct device *dev; struct dma_device dma_dev; @@ -1288,7 +1292,7 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, } static const struct tegra_dma_chip_data tegra186_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = false, @@ -1296,7 +1300,7 @@ static const struct tegra_dma_chip_data tegra186_dma_chip_data = { }; static const struct tegra_dma_chip_data tegra194_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = true, @@ -1304,7 +1308,7 @@ static const struct tegra_dma_chip_data tegra194_dma_chip_data = { }; static const struct tegra_dma_chip_data tegra234_dma_chip_data = { - .nr_channels = 31, + .nr_channels = 32, .channel_reg_size = SZ_64K, .max_dma_count = SZ_1G, .hw_support_pause = true, @@ -1380,15 +1384,28 @@ static int tegra_dma_probe(struct platform_device *pdev) } stream_id = iommu_spec->ids[0] & 0xffff; + ret = device_property_read_u32(&pdev->dev, "dma-channel-mask", + &tdma->chan_mask); + if (ret) { + dev_warn(&pdev->dev, + "Missing dma-channel-mask property, using default channel mask %#x\n", + TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK); + tdma->chan_mask = TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK; + } + INIT_LIST_HEAD(&tdma->dma_dev.channels); for (i = 0; i < cdata->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + /* Check for channel mask */ + if (!(tdma->chan_mask & BIT(i))) + continue; + tdc->irq = platform_get_irq(pdev, i); if (tdc->irq < 0) return tdc->irq; - tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET + + tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET + i * cdata->channel_reg_size; snprintf(tdc->name, sizeof(tdc->name), "gpcdma.%d", i); tdc->tdma = tdma; @@ -1449,8 +1466,8 @@ static int tegra_dma_probe(struct platform_device *pdev) return ret; } - dev_info(&pdev->dev, "GPC DMA driver register %d channels\n", - cdata->nr_channels); + dev_info(&pdev->dev, "GPC DMA driver register %lu channels\n", + hweight_long(tdma->chan_mask)); return 0; } @@ -1473,6 +1490,9 @@ static int __maybe_unused tegra_dma_pm_suspend(struct device *dev) for (i = 0; i < tdma->chip_data->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + if (!(tdma->chan_mask & BIT(i))) + continue; + if (tdc->dma_desc) { dev_err(tdma->dev, "channel %u busy\n", i); return -EBUSY; @@ -1492,6 +1512,9 @@ static int __maybe_unused tegra_dma_pm_resume(struct device *dev) for (i = 0; i < tdma->chip_data->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; + if (!(tdma->chan_mask & BIT(i))) + continue; + tegra_dma_program_sid(tdc, tdc->stream_id); } -- GitLab From 6313f4b5a438023c0d20960f19df16483cbbb8d7 Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Wed, 26 Oct 2022 12:05:33 -0700 Subject: [PATCH 200/694] dt-bindings: arm-smmu: Add 'compatible' for QDU1000 and QRU1000 Add compatible bindings for Qualcomm QDU1000 and QRU1000 platforms. Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20221026190534.4004945-2-quic_molvera@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 9066e6df1ba10..6258302ed9e75 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -34,6 +34,7 @@ properties: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qdu1000-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 -- GitLab From 7b52f53ce1914f5b3542665ff6a373ee858161c9 Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Wed, 26 Oct 2022 12:05:34 -0700 Subject: [PATCH 201/694] drivers: arm-smmu-impl: Add QDU1000 and QRU1000 iommu implementation Add compatible for Qualcomm QDU1000 and QRU1000 SoCs to add iommu support for them. Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20221026190534.4004945-3-quic_molvera@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index b2708de25ea34..0580a381a04ba 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -426,6 +426,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,qcm2290-smmu-500" }, + { .compatible = "qcom,qdu1000-smmu-500" }, { .compatible = "qcom,sc7180-smmu-500" }, { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, -- GitLab From 728b22a57232a1738d87b2148908a81615240a37 Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:54 +0200 Subject: [PATCH 202/694] dt-bindings: arm-smmu: Add compatible for Qualcomm SM6115 Add compatible for the Qualcomm SM6115 platform to the ARM SMMU DeviceTree binding. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221030094258.486428-5-iskren.chernev@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 6258302ed9e75..406b98d090f62 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -42,6 +42,7 @@ properties: - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - qcom,sm6115-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - qcom,sm8150-smmu-500 -- GitLab From 2fd6e1ad7e199c1ef54341a54fa4e11edc31b63c Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Sun, 30 Oct 2022 11:42:55 +0200 Subject: [PATCH 203/694] iommu/arm-smmu-qcom: Add SM6115 support Add the Qualcomm SM6115 platform to the list of compatible, this target uses MMU500 for both APSS and GPU. Signed-off-by: Adam Skladowski Signed-off-by: Iskren Chernev Link: https://lore.kernel.org/r/20221030094258.486428-6-iskren.chernev@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 0580a381a04ba..0f4eaf217983d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -433,6 +433,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8280xp-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, + { .compatible = "qcom,sm6115-smmu-500" }, { .compatible = "qcom,sm6125-smmu-500" }, { .compatible = "qcom,sm6350-smmu-500" }, { .compatible = "qcom,sm6375-smmu-500" }, -- GitLab From 8d3a9ec6ae2886305fdd03652592b3c7ffea672b Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 10 Nov 2022 19:36:06 -0500 Subject: [PATCH 204/694] dt-bindings: iommu: arm-smmu: add sdm670 compatible The Snapdragon 670 needs the IOMMU for GENI I2C. Add a compatible string in the documentation to represent its support. Signed-off-by: Richard Acayan Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221111003606.126795-2-mailingradian@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 406b98d090f62..3ade2dbca70ee 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -39,6 +39,7 @@ properties: - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 + - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 -- GitLab From dbf88f74358338cc444933346a0a57635fbb4c94 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:26 +0300 Subject: [PATCH 205/694] dt-bindings: arm-smmu: Add missing Qualcomm SMMU compatibles Add missing compatibles used for Adreno SMMU on sc7280 and sm8450 platforms and for the Qualcomm v2 SMMU used on SDM630 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-2-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3ade2dbca70ee..aa863811996f1 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -28,6 +28,7 @@ properties: - enum: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 - const: qcom,smmu-v2 - description: Qcom SoCs implementing "arm,mmu-500" @@ -51,10 +52,20 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - const: arm,mmu-500 + + - description: Qcom Adreno GPUs implementing "arm,smmu-500" + items: + - enum: + - qcom,sc7280-smmu-500 + - qcom,sm8250-smmu-500 + - const: qcom,adreno-smmu + - const: arm,mmu-500 - description: Qcom Adreno GPUs implementing "arm,smmu-v2" items: - enum: + - qcom,msm8996-smmu-v2 - qcom,sc7180-smmu-v2 + - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 -- GitLab From 982295bfe36925919ab61aab0657528541a2aa83 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:27 +0300 Subject: [PATCH 206/694] dt-bindings: arm-smmu: fix clocks/clock-names schema Rework clocks/clock-names properties schema to properly describe possible usage cases. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-3-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 129 ++++++++++++++++-- 1 file changed, 121 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index aa863811996f1..99f34a40cfe20 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -161,16 +161,12 @@ properties: present in such cases. clock-names: - items: - - const: bus - - const: iface + minItems: 1 + maxItems: 7 clocks: - items: - - description: bus clock required for downstream bus access and for the - smmu ptw - - description: interface clock required to access smmu's registers - through the TCU's programming interface. + minItems: 1 + maxItems: 7 power-domains: maxItems: 1 @@ -220,6 +216,123 @@ allOf: reg: maxItems: 1 + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8998-smmu-v2 + - qcom,sdm630-smmu-v2 + then: + anyOf: + - properties: + clock-names: + items: + - const: bus + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - properties: + clock-names: + items: + - const: iface + - const: mem + - const: mem_iface + clocks: + items: + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for memory access + - description: bus clock required for GPU memory access + - properties: + clock-names: + items: + - const: iface-mm + - const: iface-smmu + - const: bus-mm + - const: bus-smmu + clocks: + items: + - description: interface clock required to access mnoc's registers + through the TCU's programming interface. + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for downstream bus access + - description: bus clock required for the smmu ptw + + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8996-smmu-v2 + - qcom,sc7180-smmu-v2 + - qcom,sdm845-smmu-v2 + then: + properties: + clock-names: + items: + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + + - if: + properties: + compatible: + contains: + const: qcom,sc7280-smmu-500 + then: + properties: + clock-names: + items: + - const: gcc_gpu_memnoc_gfx_clk + - const: gcc_gpu_snoc_dvm_gfx_clk + - const: gpu_cc_ahb_clk + - const: gpu_cc_hlos1_vote_gpu_smmu_clk + - const: gpu_cc_cx_gmu_clk + - const: gpu_cc_hub_cx_int_clk + - const: gpu_cc_hub_aon_clk + + clocks: + items: + - description: GPU memnoc_gfx clock + - description: GPU snoc_dvm_gfx clock + - description: GPU ahb clock + - description: GPU hlos1_vote_GPU smmu clock + - description: GPU cx_gmu clock + - description: GPU hub_cx_int clock + - description: GPU hub_aon clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + then: + properties: + clock-names: + items: + - const: ahb + - const: bus + - const: iface + + clocks: + items: + - description: bus clock required for AHB bus access + - description: bus clock required for downstream bus access and for + the smmu ptw + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + examples: - |+ /* SMMU with stream matching or stream indexing */ -- GitLab From 3a12e8c065362f0d900a4a93ee60565253d7fde7 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:28 +0300 Subject: [PATCH 207/694] dt-bindings: arm-smmu: add special case for Google Cheza platform Cheza fw does not properly program the GPU aperture to allow the GPU to update the SMMU pagetables for context switches. The board file works around this by dropping the "qcom,adreno-smmu" compat string. Add this usecase to arm,smmu.yaml schema. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-4-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 99f34a40cfe20..29a8b3ff8fa00 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -69,6 +69,10 @@ properties: - qcom,sdm845-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 + - description: Qcom Adreno GPUs on Google Cheza platform + items: + - const: qcom,sdm845-smmu-v2 + - const: qcom,smmu-v2 - description: Marvell SoCs implementing "arm,mmu-500" items: - const: marvell,ap806-smmu-500 -- GitLab From 6c84bbd103d85696af9cc0f746c01f9b2847637e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:29 +0300 Subject: [PATCH 208/694] dt-bindings: arm-smmu: Add generic qcom,smmu-500 bindings Add generic bindings for the Qualcomm variant of the ARM MMU-500. It is expected that all future platforms will use the generic qcom,smmu-500 compat string in addition to SoC-specific and the generic arm,mmu-500 ones. Older bindings are now described as deprecated. Note: I have split the sdx55 and sdx65 from the legacy bindings. They are not supported by the qcom SMMU implementation. I can suppose that they are using the generic implementation rather than the Qualcomm-speicific one. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-5-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 29a8b3ff8fa00..28f5720824cd3 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -31,7 +31,7 @@ properties: - qcom,sdm630-smmu-v2 - const: qcom,smmu-v2 - - description: Qcom SoCs implementing "arm,mmu-500" + - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500" items: - enum: - qcom,qcm2290-smmu-500 @@ -42,8 +42,35 @@ properties: - qcom,sc8280xp-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sm6115-smmu-500 + - qcom,sm6350-smmu-500 + - qcom,sm6375-smmu-500 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 + - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + - const: qcom,smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (non-qcom implementation) + deprecated: true + items: + - enum: - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - const: arm,mmu-500 + + - description: Qcom SoCs implementing "arm,mmu-500" (legacy binding) + deprecated: true + items: + # Do not add additional SoC to this list. Instead use two previous lists. + - enum: + - qcom,qcm2290-smmu-500 + - qcom,sc7180-smmu-500 + - qcom,sc7280-smmu-500 + - qcom,sc8180x-smmu-500 + - qcom,sc8280xp-smmu-500 + - qcom,sdm845-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 -- GitLab From 4c1d0ad153f8bca09776da6031639d3b965d849a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:30 +0300 Subject: [PATCH 209/694] iommu/arm-smmu-qcom: Move implementation data into match data In preparation to rework of the implementation and configuration details, make qcom_smmu_create() accept new qcom_smmu_match_data structure pointer. Make implementation a field in this struct. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-6-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 62 ++++++++++++++-------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 4 ++ 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 0f4eaf217983d..a7bd49e44bcac 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -405,10 +405,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, - const struct arm_smmu_impl *impl) + const struct qcom_smmu_match_data *data) { + const struct arm_smmu_impl *impl; struct qcom_smmu *qsmmu; + if (!data) + return ERR_PTR(-EINVAL); + + impl = data->impl; + if (!impl) + return smmu; + /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) return ERR_PTR(-EPROBE_DEFER); @@ -423,24 +431,32 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return &qsmmu->smmu; } +static const struct qcom_smmu_match_data qcom_smmu_data = { + .impl = &qcom_smmu_impl, +}; + +static const struct qcom_smmu_match_data qcom_adreno_smmu_data = { + .impl = &qcom_adreno_smmu_impl, +}; + static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500" }, - { .compatible = "qcom,qdu1000-smmu-500" }, - { .compatible = "qcom,sc7180-smmu-500" }, - { .compatible = "qcom,sc7280-smmu-500" }, - { .compatible = "qcom,sc8180x-smmu-500" }, - { .compatible = "qcom,sc8280xp-smmu-500" }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6115-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500" }, - { .compatible = "qcom,sm6350-smmu-500" }, - { .compatible = "qcom,sm6375-smmu-500" }, - { .compatible = "qcom,sm8150-smmu-500" }, - { .compatible = "qcom,sm8250-smmu-500" }, - { .compatible = "qcom,sm8350-smmu-500" }, - { .compatible = "qcom,sm8450-smmu-500" }, + { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data }, { } }; @@ -455,12 +471,13 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; + const struct of_device_id *match; #ifdef CONFIG_ACPI if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + return qcom_smmu_create(smmu, &qcom_smmu_data); } #endif @@ -471,10 +488,11 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) * features if the order is changed. */ if (of_device_is_compatible(np, "qcom,adreno-smmu")) - return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl); + return qcom_smmu_create(smmu, &qcom_adreno_smmu_data); - if (of_match_node(qcom_smmu_impl_of_match, np)) - return qcom_smmu_create(smmu, &qcom_smmu_impl); + match = of_match_node(qcom_smmu_impl_of_match, np); + if (match) + return qcom_smmu_create(smmu, match->data); return smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 99ec8f8629a0d..2424f10b7110b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -14,6 +14,10 @@ struct qcom_smmu { u32 stall_enabled; }; +struct qcom_smmu_match_data { + const struct arm_smmu_impl *impl; +}; + #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); -- GitLab From 30b912a03d91727d75ae14f277b64aca8fb915e4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:31 +0300 Subject: [PATCH 210/694] iommu/arm-smmu-qcom: Move the qcom,adreno-smmu check into qcom_smmu_create Move special handling of qcom,adreno-smmu into qcom_smmu_create() function. This allows us to further customize the Adreno SMMU implementation. Note, this also adds two entries to the qcom_smmu_impl_of_match table. They were used with the qcom,adreno-smmu compat and were handled by the removed clause. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-7-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 32 ++++++++++++---------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 1 + 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index a7bd49e44bcac..e611941277728 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -407,13 +407,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, const struct qcom_smmu_match_data *data) { + const struct device_node *np = smmu->dev->of_node; const struct arm_smmu_impl *impl; struct qcom_smmu *qsmmu; if (!data) return ERR_PTR(-EINVAL); - impl = data->impl; + if (np && of_device_is_compatible(np, "qcom,adreno-smmu")) + impl = data->adreno_impl; + else + impl = data->impl; + if (!impl) return smmu; @@ -431,15 +436,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return &qsmmu->smmu; } -static const struct qcom_smmu_match_data qcom_smmu_data = { - .impl = &qcom_smmu_impl, +/* + * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996, + * there are not enough context banks. + */ +static const struct qcom_smmu_match_data msm8996_smmu_data = { + .impl = NULL, + .adreno_impl = &qcom_adreno_smmu_impl, }; -static const struct qcom_smmu_match_data qcom_adreno_smmu_data = { - .impl = &qcom_adreno_smmu_impl, +static const struct qcom_smmu_match_data qcom_smmu_data = { + .impl = &qcom_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_impl, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, @@ -448,6 +460,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, @@ -481,15 +494,6 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) } #endif - /* - * Do not change this order of implementation, i.e., first adreno - * smmu impl and then apss smmu since we can have both implementing - * arm,mmu-500 in which case we will miss setting adreno smmu specific - * features if the order is changed. - */ - if (of_device_is_compatible(np, "qcom,adreno-smmu")) - return qcom_smmu_create(smmu, &qcom_adreno_smmu_data); - match = of_match_node(qcom_smmu_impl_of_match, np); if (match) return qcom_smmu_create(smmu, match->data); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 2424f10b7110b..424d8d342ce06 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -16,6 +16,7 @@ struct qcom_smmu { struct qcom_smmu_match_data { const struct arm_smmu_impl *impl; + const struct arm_smmu_impl *adreno_impl; }; #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG -- GitLab From 417b76adcf1d141666866eba5afdd42953f66e2f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:32 +0300 Subject: [PATCH 211/694] iommu/arm-smmu-qcom: provide separate implementation for SDM845-smmu-500 There is only one platform, which needs special care in the reset function, the SDM845. Add special handler for sdm845 and drop the qcom_smmu500_reset() function. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-8-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index e611941277728..6dc7fa9187991 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -361,6 +361,8 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) { int ret; + arm_mmu500_reset(smmu); + /* * To address performance degradation in non-real time clients, * such as USB and UFS, turn off wait-for-safe on sdm845 based boards, @@ -374,23 +376,20 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } -static int qcom_smmu500_reset(struct arm_smmu_device *smmu) -{ - const struct device_node *np = smmu->dev->of_node; - - arm_mmu500_reset(smmu); - - if (of_device_is_compatible(np, "qcom,sdm845-smmu-500")) - return qcom_sdm845_smmu500_reset(smmu); - - return 0; -} - static const struct arm_smmu_impl qcom_smmu_impl = { .init_context = qcom_smmu_init_context, .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = arm_mmu500_reset, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl sdm845_smmu_500_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .reset = qcom_sdm845_smmu500_reset, .write_s2cr = qcom_smmu_write_s2cr, .tlb_sync = qcom_smmu_tlb_sync, }; @@ -398,7 +397,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = { static const struct arm_smmu_impl qcom_adreno_smmu_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, - .reset = qcom_smmu500_reset, + .reset = arm_mmu500_reset, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, @@ -450,6 +449,14 @@ static const struct qcom_smmu_match_data qcom_smmu_data = { .adreno_impl = &qcom_adreno_smmu_impl, }; +static const struct qcom_smmu_match_data sdm845_smmu_500_data = { + .impl = &sdm845_smmu_500_impl, + /* + * No need for adreno impl here. On sdm845 the Adreno SMMU is handled + * by the separate sdm845-smmu-v2 device. + */ +}; + static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, @@ -461,7 +468,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, -- GitLab From 4172dda2b30a9a0e628e81d2a3bc9a6ef0936774 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:33 +0300 Subject: [PATCH 212/694] iommu/arm-smmu-qcom: Merge table from arm-smmu-qcom-debug into match data There is little point in having a separate match table in arm-smmu-qcom-debug.c. Merge it into the main match data table in arm-smmu-qcom.c Note, this also enables debug support for qdu1000, sm6115, sm6375 and ACPI-based sc8180x systems, since these SoCs are expected to support tlb_sync debug. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-9-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 91 ------------------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 50 ++++++---- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 16 +++- 3 files changed, 45 insertions(+), 112 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 6eed8e67a0ca8..74e9ef2fd5804 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -10,16 +10,6 @@ #include "arm-smmu.h" #include "arm-smmu-qcom.h" -enum qcom_smmu_impl_reg_offset { - QCOM_SMMU_TBU_PWR_STATUS, - QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, - QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, -}; - -struct qcom_smmu_config { - const u32 *reg_offset; -}; - void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { int ret; @@ -59,84 +49,3 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) tbu_pwr_status, sync_inv_ack, sync_inv_progress); } } - -/* Implementation Defined Register Space 0 register offsets */ -static const u32 qcom_smmu_impl0_reg_offset[] = { - [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, - [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, - [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, -}; - -static const struct qcom_smmu_config qcm2290_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7180_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc7280_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8180x_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sc8280xp_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6125_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm6350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8150_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8250_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8350_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct qcom_smmu_config sm8450_smmu_cfg = { - .reg_offset = qcom_smmu_impl0_reg_offset, -}; - -static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = { - { .compatible = "qcom,msm8998-smmu-v2" }, - { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg }, - { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg }, - { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg}, - { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg }, - { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg }, - { .compatible = "qcom,sdm630-smmu-v2" }, - { .compatible = "qcom,sdm845-smmu-500" }, - { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg}, - { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg}, - { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg }, - { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg }, - { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg }, - { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg }, - { } -}; - -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - const struct of_device_id *match; - const struct device_node *np = smmu->dev->of_node; - - match = of_match_node(qcom_smmu_impl_debug_match, np); - if (!match) - return NULL; - - return match->data; -} diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6dc7fa9187991..1843bcd81402e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -430,11 +430,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = qcom_smmu_impl_data(smmu); + qsmmu->cfg = data->cfg; return &qsmmu->smmu; } +/* Implementation Defined Register Space 0 register offsets */ +static const u32 qcom_smmu_impl0_reg_offset[] = { + [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, + [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, + [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, +}; + +static const struct qcom_smmu_config qcom_smmu_impl0_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + /* * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996, * there are not enough context banks. @@ -455,28 +466,35 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = { * No need for adreno impl here. On sdm845 the Adreno SMMU is handled * by the separate sdm845-smmu-v2 device. */ + /* Also no debug configuration. */ +}; + +static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { + .impl = &qcom_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_impl, + .cfg = &qcom_smmu_impl0_cfg, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, - { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data }, - { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data }, + { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, + { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data }, { } }; @@ -497,7 +515,7 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) if (np == NULL) { /* Match platform for ACPI boot */ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) - return qcom_smmu_create(smmu, &qcom_smmu_data); + return qcom_smmu_create(smmu, &qcom_smmu_500_impl0_data); } #endif diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 424d8d342ce06..593910567b884 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -14,20 +14,26 @@ struct qcom_smmu { u32 stall_enabled; }; +enum qcom_smmu_impl_reg_offset { + QCOM_SMMU_TBU_PWR_STATUS, + QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, + QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, +}; + +struct qcom_smmu_config { + const u32 *reg_offset; +}; + struct qcom_smmu_match_data { + const struct qcom_smmu_config *cfg; const struct arm_smmu_impl *impl; const struct arm_smmu_impl *adreno_impl; }; #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); -const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } -static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) -{ - return NULL; -} #endif #endif /* _ARM_SMMU_QCOM_H */ -- GitLab From b4c6ee515c426f5fffc3e25772a03e44655d6e1c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:34 +0300 Subject: [PATCH 213/694] iommu/arm-smmu-qcom: Stop using mmu500 reset for v2 MMUs The arm_mmu500_reset() writes into registers specific for MMU500. For the generic ARM SMMU v2 these registers (sACR) are defined as 'implementation defined'. Downstream Qualcomm driver for SMMUv2 doesn't touch them. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-10-dmitry.baryshkov@linaro.org [will: Remove unused 'qcom_smmu_data' stucture] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 38 +++++++++++++++------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 1843bcd81402e..07372db4184e1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -376,7 +376,15 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } -static const struct arm_smmu_impl qcom_smmu_impl = { +static const struct arm_smmu_impl qcom_smmu_v2_impl = { + .init_context = qcom_smmu_init_context, + .cfg_probe = qcom_smmu_cfg_probe, + .def_domain_type = qcom_smmu_def_domain_type, + .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl qcom_smmu_500_impl = { .init_context = qcom_smmu_init_context, .cfg_probe = qcom_smmu_cfg_probe, .def_domain_type = qcom_smmu_def_domain_type, @@ -394,7 +402,15 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = { .tlb_sync = qcom_smmu_tlb_sync, }; -static const struct arm_smmu_impl qcom_adreno_smmu_impl = { +static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { + .init_context = qcom_adreno_smmu_init_context, + .def_domain_type = qcom_smmu_def_domain_type, + .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, + .write_sctlr = qcom_adreno_smmu_write_sctlr, + .tlb_sync = qcom_smmu_tlb_sync, +}; + +static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { .init_context = qcom_adreno_smmu_init_context, .def_domain_type = qcom_smmu_def_domain_type, .reset = arm_mmu500_reset, @@ -452,12 +468,12 @@ static const struct qcom_smmu_config qcom_smmu_impl0_cfg = { */ static const struct qcom_smmu_match_data msm8996_smmu_data = { .impl = NULL, - .adreno_impl = &qcom_adreno_smmu_impl, + .adreno_impl = &qcom_adreno_smmu_v2_impl, }; -static const struct qcom_smmu_match_data qcom_smmu_data = { - .impl = &qcom_smmu_impl, - .adreno_impl = &qcom_adreno_smmu_impl, +static const struct qcom_smmu_match_data qcom_smmu_v2_data = { + .impl = &qcom_smmu_v2_impl, + .adreno_impl = &qcom_adreno_smmu_v2_impl, }; static const struct qcom_smmu_match_data sdm845_smmu_500_data = { @@ -470,22 +486,22 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = { }; static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { - .impl = &qcom_smmu_impl, - .adreno_impl = &qcom_adreno_smmu_impl, + .impl = &qcom_smmu_500_impl, + .adreno_impl = &qcom_adreno_smmu_500_impl, .cfg = &qcom_smmu_impl0_cfg, }; static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, - { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, - { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data }, - { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data }, + { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, -- GitLab From 80b71080720e34eaf06642c372d4c11d046baf27 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Nov 2022 20:06:35 +0300 Subject: [PATCH 214/694] iommu/arm-smmu-qcom: Add generic qcom,smmu-500 match entry Add generic qcom,smmu-500 compatibility string. Newer platforms should use this generic entry rather than declaring per-SoC entries. Reviewed-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114170635.1406534-11-dmitry.baryshkov@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 07372db4184e1..c94daf88c5051 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -491,6 +491,10 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { .cfg = &qcom_smmu_impl0_cfg, }; +/* + * Do not add any more qcom,SOC-smmu-500 entries to this list, unless they need + * special handling and can not be covered by the qcom,smmu-500 entry. + */ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data }, { .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data }, @@ -511,6 +515,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data }, { } }; -- GitLab From 93e3f45a26310e3f3f8558be40df411e23ab742c Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:39 +0530 Subject: [PATCH 215/694] powerpc: Fix __WARN_FLAGS() for use with Objtool Commit 1e688dd2a3d675 ("powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto") updated __WARN_FLAGS() to use asm goto, and added a call to 'unreachable()' after the asm goto for optimal code generation. With CONFIG_OBJTOOL enabled, 'annotate_unreachable()' statement in 'unreachable()' tries to note down the location of the subsequent instruction in a separate elf section to aid code flow analysis. However, on powerpc, this results in gcc emitting a call to a symbol of size 0. This results in objtool complaining of "unannotated intra-function call" since the target symbol is not a valid function call destination. Objtool wants this annotation for code flow analysis, which we are not yet enabling on powerpc. As such, expand the call to 'unreachable()' in __WARN_FLAGS() without annotate_unreachable(): barrier_before_unreachable(); __builtin_unreachable(); This still results in optimal code generation for __WARN_FLAGS(), while getting rid of the objtool warning. We still need barrier_before_unreachable() to work around gcc bugs 82365 and 106751: - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106751 Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-2-sv@linux.ibm.com --- arch/powerpc/include/asm/bug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c24..ef42adb44aa3f 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -99,7 +99,8 @@ __label__ __label_warn_on; \ \ WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \ - unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ \ __label_warn_on: \ break; \ -- GitLab From 01f2cf0b990e58ae89142f57c7e02d33621311d2 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:40 +0530 Subject: [PATCH 216/694] powerpc: Override __ALIGN and __ALIGN_STR macros In a subsequent patch, we would want to annotate powerpc assembly functions with SYM_FUNC_START_LOCAL macro. This macro depends on __ALIGN macro. The default expansion of __ALIGN macro is: #define __ALIGN .align 4,0x90 So, override __ALIGN and __ALIGN_STR macros to use the same alignment as that of the existing _GLOBAL macro. Also, do not pad with 0x90, because repeated 0x90s are not a nop or trap on powerpc. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-3-sv@linux.ibm.com --- arch/powerpc/include/asm/linkage.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b71b9582e7546..b88d1d2cf3044 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -4,6 +4,9 @@ #include +#define __ALIGN .align 2 +#define __ALIGN_STR ".align 2" + #ifdef CONFIG_PPC64_ELF_ABI_V1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ -- GitLab From 29a011fc79e625b2b02f25262657f7c4c59ae9f7 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:41 +0530 Subject: [PATCH 217/694] powerpc: Fix objtool unannotated intra-function call warnings Objtool throws unannotated intra-function call warnings in the following assembly files: arch/powerpc/kernel/vector.o: warning: objtool: .text+0x53c: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x60: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x124: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5d4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5dc: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xcb8: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xd0c: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x1030: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x358: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x728: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4d94: unannotated intra-function call arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4ec4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_interrupts.o: warning: objtool: .text+0x6c: unannotated intra-function call arch/powerpc/kernel/misc_64.o: warning: objtool: .text+0x64: unannotated intra-function call Objtool does not add STT_NOTYPE symbols with size 0 to the rbtree, which is why find_call_destination() function is not able to find the destination symbol for 'bl' instruction. For such symbols, objtool is throwing unannotated intra-function call warnings in assembly files. Fix these warnings by annotating those symbols with SYM_FUNC_START_LOCAL and SYM_FUNC_END macros, inorder to set symbol type to STT_FUNC and symbol size accordingly. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-4-sv@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 4 +++- arch/powerpc/kernel/head_64.S | 7 +++++-- arch/powerpc/kernel/misc_64.S | 4 +++- arch/powerpc/kernel/vector.S | 4 +++- arch/powerpc/kvm/book3s_hv_interrupts.S | 4 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 22 +++++++++++++++------- 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5381a43e50fef..77201ad9f329d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -3112,7 +3113,7 @@ _GLOBAL(enable_machine_check) blr /* MSR[RI] should be clear because this uses SRR[01] */ -disable_machine_check: +SYM_FUNC_START_LOCAL(disable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 @@ -3125,3 +3126,4 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr +SYM_FUNC_END(disable_machine_check) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dedcc6fe2263a..874efd25cc454 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -18,6 +18,7 @@ * variants. */ +#include #include #include #include @@ -462,7 +463,7 @@ generic_secondary_common_init: * Assumes we're mapped EA == RA if the MMU is on. */ #ifdef CONFIG_PPC_BOOK3S -__mmu_off: +SYM_FUNC_START_LOCAL(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -473,6 +474,7 @@ __mmu_off: sync rfid b . /* prevent speculative execution */ +SYM_FUNC_END(__mmu_off) #endif @@ -869,7 +871,7 @@ _GLOBAL(start_secondary_resume) /* * This subroutine clobbers r11 and r12 */ -enable_64b_mode: +SYM_FUNC_START_LOCAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ #ifdef CONFIG_PPC_BOOK3E_64 oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ @@ -881,6 +883,7 @@ enable_64b_mode: isync #endif blr +SYM_FUNC_END(enable_64b_mode) /* * This puts the TOC pointer into r2, offset by 0x8000 (as expected diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b1..c61a7ba446a8a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include #include #include #include @@ -353,7 +354,7 @@ _GLOBAL(kexec_smp_wait) * * don't overwrite r3 here, it is live for kexec_wait above. */ -real_mode: /* assume normal blr return */ +SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */ #ifdef CONFIG_PPC_BOOK3E_64 /* Create an identity mapping. */ b kexec_create_tlb @@ -370,6 +371,7 @@ real_mode: /* assume normal blr return */ mtspr SPRN_SRR0,r11 rfid #endif +SYM_FUNC_END(real_mode) /* * kexec_sequence(newstack, start, image, control, clear_all(), diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 5cf64740edb82..ffe5d90abe179 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -185,7 +186,7 @@ fphalf: * Internal routine to enable floating point and set FPSCR to 0. * Don't call it from C; it doesn't use the normal calling convention. */ -fpenable: +SYM_FUNC_START_LOCAL(fpenable) #ifdef CONFIG_PPC32 stwu r1,-64(r1) #else @@ -202,6 +203,7 @@ fpenable: mffs fr31 MTFSF_L(fr1) blr +SYM_FUNC_END(fpenable) fpdisable: mtlr r12 diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 59d89e4b154a6..c0deeea7eef30 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -9,6 +9,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -107,7 +108,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * void kvmhv_save_host_pmu(void) */ -kvmhv_save_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu) BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -154,3 +155,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, HSTATE_PMC5(r13) stw r9, HSTATE_PMC6(r13) 31: blr +SYM_FUNC_END(kvmhv_save_host_pmu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98f..a69d36cbf43be 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -10,6 +10,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -2358,7 +2359,7 @@ hmi_realmode: * This routine calls kvmppc_read_intr, a C function, if an external * interrupt is pending. */ -kvmppc_check_wake_reason: +SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason) mfspr r6, SPRN_SRR1 BEGIN_FTR_SECTION rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */ @@ -2427,6 +2428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) addi r1, r1, PPC_MIN_STKFRM mtlr r0 blr +SYM_FUNC_END(kvmppc_check_wake_reason) /* * Save away FP, VMX and VSX registers. @@ -2434,7 +2436,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_save_fp: +SYM_FUNC_START_LOCAL(kvmppc_save_fp) mflr r30 mr r31,r3 mfmsr r5 @@ -2462,6 +2464,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) stw r6,VCPU_VRSAVE(r31) mtlr r30 blr +SYM_FUNC_END(kvmppc_save_fp) /* * Load up FP, VMX and VSX registers @@ -2469,7 +2472,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_load_fp: +SYM_FUNC_START_LOCAL(kvmppc_load_fp) mflr r30 mr r31,r4 mfmsr r9 @@ -2498,6 +2501,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r30 mr r4,r31 blr +SYM_FUNC_END(kvmppc_load_fp) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* @@ -2746,7 +2750,7 @@ kvmppc_bad_host_intr: * r9 has a vcpu pointer (in) * r0 is used as a scratch register */ -kvmppc_msr_interrupt: +SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt) rldicl r0, r11, 64 - MSR_TS_S_LG, 62 cmpwi r0, 2 /* Check if we are in transactional state.. */ ld r11, VCPU_INTR_MSR(r9) @@ -2755,13 +2759,14 @@ kvmppc_msr_interrupt: li r0, 1 1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG blr +SYM_FUNC_END(kvmppc_msr_interrupt) /* * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) * * Load up guest PMU state. R3 points to the vcpu struct. */ -kvmhv_load_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu) mr r4, r3 mflr r0 li r3, 1 @@ -2811,13 +2816,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 blr +SYM_FUNC_END(kvmhv_load_guest_pmu) /* * void kvmhv_load_host_pmu(void) * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -kvmhv_load_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu) mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2859,6 +2865,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 23: blr +SYM_FUNC_END(kvmhv_load_host_pmu) /* * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) @@ -2866,7 +2873,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -kvmhv_save_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu) mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2942,6 +2949,7 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCRS, r4 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr +SYM_FUNC_END(kvmhv_save_guest_pmu) /* * This works around a hardware bug on POWER8E processors, where -- GitLab From 8d0c21b50655bfe136a76cf384495ba1f9c87224 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:42 +0530 Subject: [PATCH 218/694] powerpc: Curb objtool unannotated intra-function call warnings objtool throws the following unannotated intra-function call warnings: arch/powerpc/kernel/entry_64.o: warning: objtool: .text+0x4: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xe64: unannotated intra-function call arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xee4: unannotated intra-function call Fix these warnings by annotating intra-function calls, using ANNOTATE_INTRA_FUNCTION_CALL macro, to indicate that the branch targets are valid. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-5-sv@linux.ibm.com --- arch/powerpc/kernel/entry_64.S | 2 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3e2e37e6ecabe..1bf1121e17f1c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -14,6 +14,7 @@ * code, and exception/interrupt return code for PowerPC. */ +#include #include #include #include @@ -73,6 +74,7 @@ flush_branch_caches: // Flush the link stack .rept 64 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr b 1f diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index a69d36cbf43be..96b65b5301569 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -1523,12 +1524,14 @@ kvm_flush_link_stack: /* Flush the link stack. On Power8 it's up to 32 entries in size. */ .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr /* And on Power9 it's up to 64. */ BEGIN_FTR_SECTION .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) -- GitLab From f87f6e5b4539639460ab105e597e5190c9b2500f Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Fri, 4 Nov 2022 06:21:21 +0800 Subject: [PATCH 219/694] iommu/arm-smmu: Warn once when the perfetcher errata patch fails to apply Default reset value of secure banked register SMMU_sACR.cache_lock is 1. If it is not been set to 0 by secure software(eg: atf), the non-secure linux cannot clear ARM_MMU500_ACTLR_CPRE bit. In this situation, the prefetcher errata is not applied successfully, warn once. Signed-off-by: Chen Lin Link: https://lore.kernel.org/r/20221103222121.3051-1-chen45464546@163.com [will: Tweaked wording of diagnostic] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 658f3cc832781..9dc772f2cbb27 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -136,6 +136,9 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); + reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); + if (reg & ARM_MMU500_ACTLR_CPRE) + dev_warn_once(smmu->dev, "Failed to disable prefetcher [errata #841119 and #826419], check ACR.CACHE_LOCK\n"); } return 0; -- GitLab From 65d9cc3fd0e7f98964622557c0c94240e68441e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:06:58 +0200 Subject: [PATCH 220/694] rtc: abx80x: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-2-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-abx80x.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index 9b0138d07232d..e7f325ced9402 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -673,13 +673,28 @@ static int abx80x_setup_watchdog(struct abx80x_priv *priv) } #endif -static int abx80x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id abx80x_id[] = { + { "abx80x", ABX80X }, + { "ab0801", AB0801 }, + { "ab0803", AB0803 }, + { "ab0804", AB0804 }, + { "ab0805", AB0805 }, + { "ab1801", AB1801 }, + { "ab1803", AB1803 }, + { "ab1804", AB1804 }, + { "ab1805", AB1805 }, + { "rv1805", RV1805 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, abx80x_id); + +static int abx80x_probe(struct i2c_client *client) { struct device_node *np = client->dev.of_node; struct abx80x_priv *priv; int i, data, err, trickle_cfg = -EINVAL; char buf[7]; + const struct i2c_device_id *id = i2c_match_id(abx80x_id, client); unsigned int part = id->driver_data; unsigned int partnumber; unsigned int majrev, minrev; @@ -847,21 +862,6 @@ static int abx80x_probe(struct i2c_client *client, return devm_rtc_register_device(priv->rtc); } -static const struct i2c_device_id abx80x_id[] = { - { "abx80x", ABX80X }, - { "ab0801", AB0801 }, - { "ab0803", AB0803 }, - { "ab0804", AB0804 }, - { "ab0805", AB0805 }, - { "ab1801", AB1801 }, - { "ab1803", AB1803 }, - { "ab1804", AB1804 }, - { "ab1805", AB1805 }, - { "rv1805", RV1805 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, abx80x_id); - #ifdef CONFIG_OF static const struct of_device_id abx80x_of_match[] = { { @@ -914,7 +914,7 @@ static struct i2c_driver abx80x_driver = { .name = "rtc-abx80x", .of_match_table = of_match_ptr(abx80x_of_match), }, - .probe = abx80x_probe, + .probe_new = abx80x_probe, .id_table = abx80x_id, }; -- GitLab From 2611e6d743be8f1ad2216622f39e12c63352a457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:00 +0200 Subject: [PATCH 221/694] rtc: isl1208: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-4-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-isl1208.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index f448a525333e1..73cc6aaf9b8b7 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -797,7 +797,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq) } static int -isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) +isl1208_probe(struct i2c_client *client) { int rc = 0; struct isl1208_state *isl1208; @@ -821,6 +821,8 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) if (!isl1208->config) return -ENODEV; } else { + const struct i2c_device_id *id = i2c_match_id(isl1208_id, client); + if (id->driver_data >= ISL_LAST_ID) return -ENODEV; isl1208->config = &isl1208_configs[id->driver_data]; @@ -906,7 +908,7 @@ static struct i2c_driver isl1208_driver = { .name = "rtc-isl1208", .of_match_table = of_match_ptr(isl1208_of_match), }, - .probe = isl1208_probe, + .probe_new = isl1208_probe, .id_table = isl1208_id, }; -- GitLab From 67db6f0515d15e6becbd59bc5da4ca2d03d51789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:01 +0200 Subject: [PATCH 222/694] rtc: m41t80: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-5-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-m41t80.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index e0b4d3794320b..d3144ffdebb58 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -876,8 +876,7 @@ static struct notifier_block wdt_notifier = { ***************************************************************************** */ -static int m41t80_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int m41t80_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; int rc = 0; @@ -897,11 +896,13 @@ static int m41t80_probe(struct i2c_client *client, return -ENOMEM; m41t80_data->client = client; - if (client->dev.of_node) + if (client->dev.of_node) { m41t80_data->features = (unsigned long) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(m41t80_id, client); m41t80_data->features = id->driver_data; + } i2c_set_clientdata(client, m41t80_data); m41t80_data->rtc = devm_rtc_allocate_device(&client->dev); @@ -1007,7 +1008,7 @@ static struct i2c_driver m41t80_driver = { .of_match_table = of_match_ptr(m41t80_of_match), .pm = &m41t80_pm, }, - .probe = m41t80_probe, + .probe_new = m41t80_probe, .remove = m41t80_remove, .id_table = m41t80_id, }; -- GitLab From c050dedb875c2eee920010c1dba458ce5bf8a171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:02 +0200 Subject: [PATCH 223/694] rtc: nct3018y: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-6-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-nct3018y.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c index d43acd3920ed3..0a3b14c95d902 100644 --- a/drivers/rtc/rtc-nct3018y.c +++ b/drivers/rtc/rtc-nct3018y.c @@ -452,8 +452,7 @@ static const struct rtc_class_ops nct3018y_rtc_ops = { .ioctl = nct3018y_ioctl, }; -static int nct3018y_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int nct3018y_probe(struct i2c_client *client) { struct nct3018y *nct3018y; int err, flags; @@ -541,7 +540,7 @@ static struct i2c_driver nct3018y_driver = { .name = "rtc-nct3018y", .of_match_table = of_match_ptr(nct3018y_of_match), }, - .probe = nct3018y_probe, + .probe_new = nct3018y_probe, .id_table = nct3018y_id, }; -- GitLab From 5418e595f30bf4fde83ebb0121417c0c95cff98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:03 +0200 Subject: [PATCH 224/694] rtc: pcf2127: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-7-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf2127.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 63b275b014bd6..87f4fc9df68b4 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -885,9 +885,17 @@ static const struct regmap_bus pcf2127_i2c_regmap = { static struct i2c_driver pcf2127_i2c_driver; -static int pcf2127_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id pcf2127_i2c_id[] = { + { "pcf2127", 1 }, + { "pcf2129", 0 }, + { "pca2129", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); + +static int pcf2127_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client); struct regmap *regmap; static const struct regmap_config config = { .reg_bits = 8, @@ -910,20 +918,12 @@ static int pcf2127_i2c_probe(struct i2c_client *client, pcf2127_i2c_driver.driver.name, id->driver_data); } -static const struct i2c_device_id pcf2127_i2c_id[] = { - { "pcf2127", 1 }, - { "pcf2129", 0 }, - { "pca2129", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); - static struct i2c_driver pcf2127_i2c_driver = { .driver = { .name = "rtc-pcf2127-i2c", .of_match_table = of_match_ptr(pcf2127_of_match), }, - .probe = pcf2127_i2c_probe, + .probe_new = pcf2127_i2c_probe, .id_table = pcf2127_i2c_id, }; -- GitLab From 8d94da6678702fac20ed3e1421b8b1ad03368a8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:04 +0200 Subject: [PATCH 225/694] rtc: rs5c372: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-8-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rs5c372.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 9562c477e1c96..5047afefcceb7 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -791,8 +791,7 @@ static int rs5c_oscillator_setup(struct rs5c372 *rs5c372) return 0; } -static int rs5c372_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rs5c372_probe(struct i2c_client *client) { int err = 0; int smbus_mode = 0; @@ -826,11 +825,13 @@ static int rs5c372_probe(struct i2c_client *client, rs5c372->client = client; i2c_set_clientdata(client, rs5c372); - if (client->dev.of_node) + if (client->dev.of_node) { rs5c372->type = (enum rtc_type) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client); rs5c372->type = id->driver_data; + } /* we read registers 0x0f then 0x00-0x0f; skip the first one */ rs5c372->regs = &rs5c372->buf[1]; @@ -920,7 +921,7 @@ static struct i2c_driver rs5c372_driver = { .name = "rtc-rs5c372", .of_match_table = of_match_ptr(rs5c372_of_match), }, - .probe = rs5c372_probe, + .probe_new = rs5c372_probe, .remove = rs5c372_remove, .id_table = rs5c372_id, }; -- GitLab From 84c2fb386f71d364cf00ec9f4596a2b279e3ca54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:05 +0200 Subject: [PATCH 226/694] rtc: rv8803: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). The device_id array has to move up for that to work. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-9-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv8803.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index 3527a0521e9b2..b581b6d5ad731 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -576,8 +576,16 @@ static int rv8803_regs_configure(struct rv8803_data *rv8803) return 0; } -static int rv8803_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct i2c_device_id rv8803_id[] = { + { "rv8803", rv_8803 }, + { "rv8804", rx_8804 }, + { "rx8803", rx_8803 }, + { "rx8900", rx_8900 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rv8803_id); + +static int rv8803_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; struct rv8803_data *rv8803; @@ -605,11 +613,14 @@ static int rv8803_probe(struct i2c_client *client, mutex_init(&rv8803->flags_lock); rv8803->client = client; - if (client->dev.of_node) + if (client->dev.of_node) { rv8803->type = (enum rv8803_type) of_device_get_match_data(&client->dev); - else + } else { + const struct i2c_device_id *id = i2c_match_id(rv8803_id, client); + rv8803->type = id->driver_data; + } i2c_set_clientdata(client, rv8803); flags = rv8803_read_reg(client, RV8803_FLAG); @@ -666,15 +677,6 @@ static int rv8803_probe(struct i2c_client *client, return 0; } -static const struct i2c_device_id rv8803_id[] = { - { "rv8803", rv_8803 }, - { "rv8804", rx_8804 }, - { "rx8803", rx_8803 }, - { "rx8900", rx_8900 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, rv8803_id); - static const __maybe_unused struct of_device_id rv8803_of_match[] = { { .compatible = "microcrystal,rv8803", @@ -701,7 +703,7 @@ static struct i2c_driver rv8803_driver = { .name = "rtc-rv8803", .of_match_table = of_match_ptr(rv8803_of_match), }, - .probe = rv8803_probe, + .probe_new = rv8803_probe, .id_table = rv8803_id, }; module_i2c_driver(rv8803_driver); -- GitLab From 8ffb7733e162d26393523bd95c5bfffa6e09baf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Oct 2022 15:07:06 +0200 Subject: [PATCH 227/694] rtc: rx8025: Convert to .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in .probe(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221021130706.178687-10-u.kleine-koenig@pengutronix.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx8025.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index dde86f3e2a4bd..77d3cb08b5ec2 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -519,9 +519,9 @@ static const struct attribute_group rx8025_attr_group = { .attrs = rx8025_attrs, }; -static int rx8025_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int rx8025_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_match_id(rx8025_id, client); struct i2c_adapter *adapter = client->adapter; struct rx8025_data *rx8025; int err = 0; @@ -580,7 +580,7 @@ static struct i2c_driver rx8025_driver = { .driver = { .name = "rtc-rx8025", }, - .probe = rx8025_probe, + .probe_new = rx8025_probe, .id_table = rx8025_id, }; -- GitLab From 9800f24f7bd5b99fb4fc4ce981427102e2e15a1c Mon Sep 17 00:00:00 2001 From: Yushan Zhou Date: Mon, 7 Nov 2022 17:25:44 +0800 Subject: [PATCH 228/694] rtc: rzn1: Check return value in rzn1_rtc_probe The rzn1_rtc_probe() function utilizes devm_pm_runtime_enable() but wasn't checking the return value. Fix it by adding missing check. Fixes: deeb4b5393e1 ("rtc: rzn1: Add new RTC driver") Signed-off-by: Yushan Zhou Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20221107092544.3721053-1-zys.zljxml@gmail.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rzn1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c index ac788799c8e3e..0d36bc50197c1 100644 --- a/drivers/rtc/rtc-rzn1.c +++ b/drivers/rtc/rtc-rzn1.c @@ -355,7 +355,9 @@ static int rzn1_rtc_probe(struct platform_device *pdev) set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features); clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features); - devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret < 0) + return ret; ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) return ret; -- GitLab From f2fa14b0b586bad3ff2c0d908b8a44b22eaebe15 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 8 Nov 2022 13:02:53 +0100 Subject: [PATCH 229/694] dt-bindings: rtc: qcom-pm8xxx: document qcom,pm8921-rtc as fallback of qcom,pm8018-rtc The PM8018 RTC is used as compatible with PM8921 RTC on the MDM9615, document this situation. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-1-bbb120c6766a@linaro.org Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml index 23ab5bb4f3956..0a7aa29563c1c 100644 --- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml @@ -11,12 +11,16 @@ maintainers: properties: compatible: - enum: - - qcom,pm8058-rtc - - qcom,pm8921-rtc - - qcom,pm8941-rtc - - qcom,pm8018-rtc - - qcom,pmk8350-rtc + oneOf: + - enum: + - qcom,pm8058-rtc + - qcom,pm8921-rtc + - qcom,pm8941-rtc + - qcom,pmk8350-rtc + - items: + - enum: + - qcom,pm8018-rtc + - const: qcom,pm8921-rtc reg: minItems: 1 -- GitLab From 741a2830734bc22238e5c248630311e401481ecc Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 8 Nov 2022 13:02:54 +0100 Subject: [PATCH 230/694] rtc: pm8xxx: drop unused pm8018 compatible The PM8018 compatible is always used with PM8921 fallback, so PM8018 compatible can be safely removed from device ID table Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-2-bbb120c6766a@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pm8xxx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index dc6d1476baa59..716e5d9ad74d1 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -461,7 +461,6 @@ static const struct pm8xxx_rtc_regs pmk8350_regs = { */ static const struct of_device_id pm8xxx_id_table[] = { { .compatible = "qcom,pm8921-rtc", .data = &pm8921_regs }, - { .compatible = "qcom,pm8018-rtc", .data = &pm8921_regs }, { .compatible = "qcom,pm8058-rtc", .data = &pm8058_regs }, { .compatible = "qcom,pm8941-rtc", .data = &pm8941_regs }, { .compatible = "qcom,pmk8350-rtc", .data = &pmk8350_regs }, -- GitLab From f27efee663701f0e93351cf052677214fed40a42 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 28 Oct 2022 17:54:00 -0700 Subject: [PATCH 231/694] rtc: cros-ec: Limit RTC alarm range if needed RTC chips on some older Chromebooks can only handle alarms less than 24 hours in the future. Attempts to set an alarm beyond that range fails. The most severe impact of this limitation is that suspend requests fail if alarmtimer_suspend() tries to set an alarm for more than 24 hours in the future. Try to set the real-time alarm to just below 24 hours if setting it to a larger value fails to work around the problem. While not perfect, it is better than just failing the call. A similar workaround is already implemented in the rtc-tps6586x driver. Drop error messages in cros_ec_rtc_get() and cros_ec_rtc_set() since the calling code also logs an error and to avoid spurious error messages if setting the alarm ultimately succeeds. Cc: Brian Norris Signed-off-by: Guenter Roeck Commit: Guenter Roeck Reviewed-by: Tzung-Bi Shih Reviewed-by: Brian Norris Tested-by: Brian Norris Link: https://lore.kernel.org/r/20221029005400.2712577-1-linux@roeck-us.net Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cros-ec.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 887f5193e253d..a3ec066d80667 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -14,6 +14,8 @@ #define DRV_NAME "cros-ec-rtc" +#define SECS_PER_DAY (24 * 60 * 60) + /** * struct cros_ec_rtc - Driver data for EC RTC * @@ -43,13 +45,8 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, msg.msg.insize = sizeof(msg.data); ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); - if (ret < 0) { - dev_err(cros_ec->dev, - "error getting %s from EC: %d\n", - command == EC_CMD_RTC_GET_VALUE ? "time" : "alarm", - ret); + if (ret < 0) return ret; - } *response = msg.data.time; @@ -59,7 +56,7 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command, static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command, u32 param) { - int ret = 0; + int ret; struct { struct cros_ec_command msg; struct ec_response_rtc data; @@ -71,13 +68,8 @@ static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command, msg.data.time = param; ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg); - if (ret < 0) { - dev_err(cros_ec->dev, "error setting %s on EC: %d\n", - command == EC_CMD_RTC_SET_VALUE ? "time" : "alarm", - ret); + if (ret < 0) return ret; - } - return 0; } @@ -190,8 +182,21 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset); if (ret < 0) { - dev_err(dev, "error setting alarm: %d\n", ret); - return ret; + if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) { + /* + * RTC chips on some older Chromebooks can only handle + * alarms up to 24h in the future. Try to set an alarm + * below that limit to avoid suspend failures. + */ + ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, + SECS_PER_DAY - 1); + } + + if (ret < 0) { + dev_err(dev, "error setting alarm in %u seconds: %d\n", + alarm_offset, ret); + return ret; + } } return 0; -- GitLab From 5dc8356830428656c3a00dd702fb9102fe43550f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 13 Sep 2022 22:49:05 +0800 Subject: [PATCH 232/694] rtc: ds1302: remove unnecessary spi_set_drvdata() Remove unnecessary spi_set_drvdata() in ds1302_remove(), the driver_data will be set to NULL in device_unbind_cleanup() after calling ->remove(). After this, ds1302_remove() is an empty function, so remove it too. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220913144905.2004924-1-yangyingliang@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1302.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index 6d66ab5a8b176..ecc7d03079320 100644 --- a/drivers/rtc/rtc-ds1302.c +++ b/drivers/rtc/rtc-ds1302.c @@ -185,11 +185,6 @@ static int ds1302_probe(struct spi_device *spi) return 0; } -static void ds1302_remove(struct spi_device *spi) -{ - spi_set_drvdata(spi, NULL); -} - #ifdef CONFIG_OF static const struct of_device_id ds1302_dt_ids[] = { { .compatible = "maxim,ds1302", }, @@ -208,7 +203,6 @@ static struct spi_driver ds1302_driver = { .driver.name = "rtc-ds1302", .driver.of_match_table = of_match_ptr(ds1302_dt_ids), .probe = ds1302_probe, - .remove = ds1302_remove, .id_table = ds1302_spi_ids, }; -- GitLab From eb633de6abcb3003a8a2c03377d39a91a8d57d20 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 19 Sep 2022 16:38:12 +0800 Subject: [PATCH 233/694] rtc: s3c: Switch to use dev_err_probe() helper In the probe path, dev_err() can be replace with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20220919083812.755082-1-yangyingliang@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s3c.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index db529733c9c46..8fc5efde3e0b3 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -429,14 +429,9 @@ static int s3c_rtc_probe(struct platform_device *pdev) return PTR_ERR(info->base); info->rtc_clk = devm_clk_get(&pdev->dev, "rtc"); - if (IS_ERR(info->rtc_clk)) { - ret = PTR_ERR(info->rtc_clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "failed to find rtc clock\n"); - else - dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n"); - return ret; - } + if (IS_ERR(info->rtc_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->rtc_clk), + "failed to find rtc clock\n"); ret = clk_prepare_enable(info->rtc_clk); if (ret) return ret; -- GitLab From 97e78b64d138021c837a30bfa78201caf27c16b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Oct 2022 17:29:34 +0200 Subject: [PATCH 234/694] rtc: remove davinci rtc driver The Davinci dm365 SoC support was removed, so the rtc driver has no remaining users. Signed-off-by: Arnd Bergmann Acked-by: Bartosz Golaszewski Acked-by: Marc Zyngier Acked-by: Kevin Hilman Link: https://lore.kernel.org/r/20221019152947.3857217-9-arnd@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 10 - drivers/rtc/Makefile | 1 - drivers/rtc/rtc-davinci.c | 512 -------------------------------------- 3 files changed, 523 deletions(-) delete mode 100644 drivers/rtc/rtc-davinci.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index bb63edb507da4..b45fd08d51dc2 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1351,16 +1351,6 @@ config RTC_DRV_ASM9260 This driver can also be built as a module. If so, the module will be called rtc-asm9260. -config RTC_DRV_DAVINCI - tristate "TI DaVinci RTC" - depends on ARCH_DAVINCI_DM365 || COMPILE_TEST - help - If you say yes here you get support for the RTC on the - DaVinci platforms (DM365). - - This driver can also be built as a module. If so, the module - will be called rtc-davinci. - config RTC_DRV_DIGICOLOR tristate "Conexant Digicolor RTC" depends on ARCH_DIGICOLOR || COMPILE_TEST diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index aab22bc634321..791994eb913d9 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_RTC_DRV_CROS_EC) += rtc-cros-ec.o obj-$(CONFIG_RTC_DRV_DA9052) += rtc-da9052.o obj-$(CONFIG_RTC_DRV_DA9055) += rtc-da9055.o obj-$(CONFIG_RTC_DRV_DA9063) += rtc-da9063.o -obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o obj-$(CONFIG_RTC_DRV_DIGICOLOR) += rtc-digicolor.o obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c deleted file mode 100644 index 6bef0f2353da4..0000000000000 --- a/drivers/rtc/rtc-davinci.c +++ /dev/null @@ -1,512 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DaVinci Power Management and Real Time Clock Driver for TI platforms - * - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * The DaVinci RTC is a simple RTC with the following - * Sec: 0 - 59 : BCD count - * Min: 0 - 59 : BCD count - * Hour: 0 - 23 : BCD count - * Day: 0 - 0x7FFF(32767) : Binary count ( Over 89 years ) - */ - -/* PRTC interface registers */ -#define DAVINCI_PRTCIF_PID 0x00 -#define PRTCIF_CTLR 0x04 -#define PRTCIF_LDATA 0x08 -#define PRTCIF_UDATA 0x0C -#define PRTCIF_INTEN 0x10 -#define PRTCIF_INTFLG 0x14 - -/* PRTCIF_CTLR bit fields */ -#define PRTCIF_CTLR_BUSY BIT(31) -#define PRTCIF_CTLR_SIZE BIT(25) -#define PRTCIF_CTLR_DIR BIT(24) -#define PRTCIF_CTLR_BENU_MSB BIT(23) -#define PRTCIF_CTLR_BENU_3RD_BYTE BIT(22) -#define PRTCIF_CTLR_BENU_2ND_BYTE BIT(21) -#define PRTCIF_CTLR_BENU_LSB BIT(20) -#define PRTCIF_CTLR_BENU_MASK (0x00F00000) -#define PRTCIF_CTLR_BENL_MSB BIT(19) -#define PRTCIF_CTLR_BENL_3RD_BYTE BIT(18) -#define PRTCIF_CTLR_BENL_2ND_BYTE BIT(17) -#define PRTCIF_CTLR_BENL_LSB BIT(16) -#define PRTCIF_CTLR_BENL_MASK (0x000F0000) - -/* PRTCIF_INTEN bit fields */ -#define PRTCIF_INTEN_RTCSS BIT(1) -#define PRTCIF_INTEN_RTCIF BIT(0) -#define PRTCIF_INTEN_MASK (PRTCIF_INTEN_RTCSS \ - | PRTCIF_INTEN_RTCIF) - -/* PRTCIF_INTFLG bit fields */ -#define PRTCIF_INTFLG_RTCSS BIT(1) -#define PRTCIF_INTFLG_RTCIF BIT(0) -#define PRTCIF_INTFLG_MASK (PRTCIF_INTFLG_RTCSS \ - | PRTCIF_INTFLG_RTCIF) - -/* PRTC subsystem registers */ -#define PRTCSS_RTC_INTC_EXTENA1 (0x0C) -#define PRTCSS_RTC_CTRL (0x10) -#define PRTCSS_RTC_WDT (0x11) -#define PRTCSS_RTC_TMR0 (0x12) -#define PRTCSS_RTC_TMR1 (0x13) -#define PRTCSS_RTC_CCTRL (0x14) -#define PRTCSS_RTC_SEC (0x15) -#define PRTCSS_RTC_MIN (0x16) -#define PRTCSS_RTC_HOUR (0x17) -#define PRTCSS_RTC_DAY0 (0x18) -#define PRTCSS_RTC_DAY1 (0x19) -#define PRTCSS_RTC_AMIN (0x1A) -#define PRTCSS_RTC_AHOUR (0x1B) -#define PRTCSS_RTC_ADAY0 (0x1C) -#define PRTCSS_RTC_ADAY1 (0x1D) -#define PRTCSS_RTC_CLKC_CNT (0x20) - -/* PRTCSS_RTC_INTC_EXTENA1 */ -#define PRTCSS_RTC_INTC_EXTENA1_MASK (0x07) - -/* PRTCSS_RTC_CTRL bit fields */ -#define PRTCSS_RTC_CTRL_WDTBUS BIT(7) -#define PRTCSS_RTC_CTRL_WEN BIT(6) -#define PRTCSS_RTC_CTRL_WDRT BIT(5) -#define PRTCSS_RTC_CTRL_WDTFLG BIT(4) -#define PRTCSS_RTC_CTRL_TE BIT(3) -#define PRTCSS_RTC_CTRL_TIEN BIT(2) -#define PRTCSS_RTC_CTRL_TMRFLG BIT(1) -#define PRTCSS_RTC_CTRL_TMMD BIT(0) - -/* PRTCSS_RTC_CCTRL bit fields */ -#define PRTCSS_RTC_CCTRL_CALBUSY BIT(7) -#define PRTCSS_RTC_CCTRL_DAEN BIT(5) -#define PRTCSS_RTC_CCTRL_HAEN BIT(4) -#define PRTCSS_RTC_CCTRL_MAEN BIT(3) -#define PRTCSS_RTC_CCTRL_ALMFLG BIT(2) -#define PRTCSS_RTC_CCTRL_AIEN BIT(1) -#define PRTCSS_RTC_CCTRL_CAEN BIT(0) - -static DEFINE_SPINLOCK(davinci_rtc_lock); - -struct davinci_rtc { - struct rtc_device *rtc; - void __iomem *base; - int irq; -}; - -static inline void rtcif_write(struct davinci_rtc *davinci_rtc, - u32 val, u32 addr) -{ - writel(val, davinci_rtc->base + addr); -} - -static inline u32 rtcif_read(struct davinci_rtc *davinci_rtc, u32 addr) -{ - return readl(davinci_rtc->base + addr); -} - -static inline void rtcif_wait(struct davinci_rtc *davinci_rtc) -{ - while (rtcif_read(davinci_rtc, PRTCIF_CTLR) & PRTCIF_CTLR_BUSY) - cpu_relax(); -} - -static inline void rtcss_write(struct davinci_rtc *davinci_rtc, - unsigned long val, u8 addr) -{ - rtcif_wait(davinci_rtc); - - rtcif_write(davinci_rtc, PRTCIF_CTLR_BENL_LSB | addr, PRTCIF_CTLR); - rtcif_write(davinci_rtc, val, PRTCIF_LDATA); - - rtcif_wait(davinci_rtc); -} - -static inline u8 rtcss_read(struct davinci_rtc *davinci_rtc, u8 addr) -{ - rtcif_wait(davinci_rtc); - - rtcif_write(davinci_rtc, PRTCIF_CTLR_DIR | PRTCIF_CTLR_BENL_LSB | addr, - PRTCIF_CTLR); - - rtcif_wait(davinci_rtc); - - return rtcif_read(davinci_rtc, PRTCIF_LDATA); -} - -static inline void davinci_rtcss_calendar_wait(struct davinci_rtc *davinci_rtc) -{ - while (rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_CALBUSY) - cpu_relax(); -} - -static irqreturn_t davinci_rtc_interrupt(int irq, void *class_dev) -{ - struct davinci_rtc *davinci_rtc = class_dev; - unsigned long events = 0; - u32 irq_flg; - u8 alm_irq, tmr_irq; - u8 rtc_ctrl, rtc_cctrl; - int ret = IRQ_NONE; - - irq_flg = rtcif_read(davinci_rtc, PRTCIF_INTFLG) & - PRTCIF_INTFLG_RTCSS; - - alm_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_ALMFLG; - - tmr_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL) & - PRTCSS_RTC_CTRL_TMRFLG; - - if (irq_flg) { - if (alm_irq) { - events |= RTC_IRQF | RTC_AF; - rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - rtc_cctrl |= PRTCSS_RTC_CCTRL_ALMFLG; - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - } else if (tmr_irq) { - events |= RTC_IRQF | RTC_PF; - rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL); - rtc_ctrl |= PRTCSS_RTC_CTRL_TMRFLG; - rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL); - } - - rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, - PRTCIF_INTFLG); - rtc_update_irq(davinci_rtc->rtc, 1, events); - - ret = IRQ_HANDLED; - } - - return ret; -} - -static int -davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u8 rtc_ctrl; - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL); - - switch (cmd) { - case RTC_WIE_ON: - rtc_ctrl |= PRTCSS_RTC_CTRL_WEN | PRTCSS_RTC_CTRL_WDTFLG; - break; - case RTC_WIE_OFF: - rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN; - break; - default: - ret = -ENOIOCTLCMD; - } - - rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return ret; -} - -static void convertfromdays(u16 days, struct rtc_time *tm) -{ - int tmp_days, year, mon; - - for (year = 2000;; year++) { - tmp_days = rtc_year_days(1, 12, year); - if (days >= tmp_days) - days -= tmp_days; - else { - for (mon = 0;; mon++) { - tmp_days = rtc_month_days(mon, year); - if (days >= tmp_days) { - days -= tmp_days; - } else { - tm->tm_year = year - 1900; - tm->tm_mon = mon; - tm->tm_mday = days + 1; - break; - } - } - break; - } - } -} - -static void convert2days(u16 *days, struct rtc_time *tm) -{ - int i; - *days = 0; - - for (i = 2000; i < 1900 + tm->tm_year; i++) - *days += rtc_year_days(1, 12, i); - - *days += rtc_year_days(tm->tm_mday, tm->tm_mon, 1900 + tm->tm_year); -} - -static int davinci_rtc_read_time(struct device *dev, struct rtc_time *tm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days = 0; - u8 day0, day1; - unsigned long flags; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_sec = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_SEC)); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_MIN)); - - davinci_rtcss_calendar_wait(davinci_rtc); - tm->tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_HOUR)); - - davinci_rtcss_calendar_wait(davinci_rtc); - day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - days |= day1; - days <<= 8; - days |= day0; - - convertfromdays(days, tm); - - return 0; -} - -static int davinci_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days; - u8 rtc_cctrl; - unsigned long flags; - - convert2days(&days, tm); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_sec), PRTCSS_RTC_SEC); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_min), PRTCSS_RTC_MIN); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(tm->tm_hour), PRTCSS_RTC_HOUR); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_DAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_DAY1); - - rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - rtc_cctrl |= PRTCSS_RTC_CCTRL_CAEN; - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static int davinci_rtc_alarm_irq_enable(struct device *dev, - unsigned int enabled) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - unsigned long flags; - u8 rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - if (enabled) - rtc_cctrl |= PRTCSS_RTC_CCTRL_DAEN | - PRTCSS_RTC_CCTRL_HAEN | - PRTCSS_RTC_CCTRL_MAEN | - PRTCSS_RTC_CCTRL_ALMFLG | - PRTCSS_RTC_CCTRL_AIEN; - else - rtc_cctrl &= ~PRTCSS_RTC_CCTRL_AIEN; - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static int davinci_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - u16 days = 0; - u8 day0, day1; - unsigned long flags; - - alm->time.tm_sec = 0; - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - alm->time.tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AMIN)); - - davinci_rtcss_calendar_wait(davinci_rtc); - alm->time.tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AHOUR)); - - davinci_rtcss_calendar_wait(davinci_rtc); - day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - days |= day1; - days <<= 8; - days |= day0; - - convertfromdays(days, &alm->time); - - alm->pending = !!(rtcss_read(davinci_rtc, - PRTCSS_RTC_CCTRL) & - PRTCSS_RTC_CCTRL_AIEN); - alm->enabled = alm->pending && device_may_wakeup(dev); - - return 0; -} - -static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) -{ - struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev); - unsigned long flags; - u16 days; - - convert2days(&days, &alm->time); - - spin_lock_irqsave(&davinci_rtc_lock, flags); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_min), PRTCSS_RTC_AMIN); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_hour), PRTCSS_RTC_AHOUR); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_ADAY0); - - davinci_rtcss_calendar_wait(davinci_rtc); - rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_ADAY1); - - spin_unlock_irqrestore(&davinci_rtc_lock, flags); - - return 0; -} - -static const struct rtc_class_ops davinci_rtc_ops = { - .ioctl = davinci_rtc_ioctl, - .read_time = davinci_rtc_read_time, - .set_time = davinci_rtc_set_time, - .alarm_irq_enable = davinci_rtc_alarm_irq_enable, - .read_alarm = davinci_rtc_read_alarm, - .set_alarm = davinci_rtc_set_alarm, -}; - -static int __init davinci_rtc_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct davinci_rtc *davinci_rtc; - int ret = 0; - - davinci_rtc = devm_kzalloc(&pdev->dev, sizeof(struct davinci_rtc), GFP_KERNEL); - if (!davinci_rtc) - return -ENOMEM; - - davinci_rtc->irq = platform_get_irq(pdev, 0); - if (davinci_rtc->irq < 0) - return davinci_rtc->irq; - - davinci_rtc->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(davinci_rtc->base)) - return PTR_ERR(davinci_rtc->base); - - platform_set_drvdata(pdev, davinci_rtc); - - davinci_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(davinci_rtc->rtc)) - return PTR_ERR(davinci_rtc->rtc); - - davinci_rtc->rtc->ops = &davinci_rtc_ops; - davinci_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; - davinci_rtc->rtc->range_max = RTC_TIMESTAMP_BEGIN_2000 + (1 << 16) * 86400ULL - 1; - - rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, PRTCIF_INTFLG); - rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_INTC_EXTENA1); - - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CTRL); - rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CCTRL); - - ret = devm_request_irq(dev, davinci_rtc->irq, davinci_rtc_interrupt, - 0, "davinci_rtc", davinci_rtc); - if (ret < 0) { - dev_err(dev, "unable to register davinci RTC interrupt\n"); - return ret; - } - - /* Enable interrupts */ - rtcif_write(davinci_rtc, PRTCIF_INTEN_RTCSS, PRTCIF_INTEN); - rtcss_write(davinci_rtc, PRTCSS_RTC_INTC_EXTENA1_MASK, - PRTCSS_RTC_INTC_EXTENA1); - - rtcss_write(davinci_rtc, PRTCSS_RTC_CCTRL_CAEN, PRTCSS_RTC_CCTRL); - - device_init_wakeup(&pdev->dev, 0); - - return devm_rtc_register_device(davinci_rtc->rtc); -} - -static int __exit davinci_rtc_remove(struct platform_device *pdev) -{ - struct davinci_rtc *davinci_rtc = platform_get_drvdata(pdev); - - device_init_wakeup(&pdev->dev, 0); - - rtcif_write(davinci_rtc, 0, PRTCIF_INTEN); - - return 0; -} - -static struct platform_driver davinci_rtc_driver = { - .remove = __exit_p(davinci_rtc_remove), - .driver = { - .name = "rtc_davinci", - }, -}; - -module_platform_driver_probe(davinci_rtc_driver, davinci_rtc_probe); - -MODULE_AUTHOR("Miguel Aguilar "); -MODULE_DESCRIPTION("Texas Instruments DaVinci PRTC Driver"); -MODULE_LICENSE("GPL"); -- GitLab From 1ff56edf137a2f034fee9b9a398ddcd8cb7a5a34 Mon Sep 17 00:00:00 2001 From: Zhang Jianhua Date: Tue, 6 Sep 2022 22:30:37 +0800 Subject: [PATCH 235/694] rtc: fsl-ftm-alarm: Use module_platform_driver replace device_initcall The ftm_rtc_driver has been registered while module init, however there is not unregister step for module exit, now use the macro module_platform_driver replace device_initcall, which can register and unregister platform driver automatically. Signed-off-by: Zhang Jianhua Link: https://lore.kernel.org/r/20220906143037.1455317-1-chris.zjh@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-fsl-ftm-alarm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c index c0df49fb978ce..3d7c4077fe1c6 100644 --- a/drivers/rtc/rtc-fsl-ftm-alarm.c +++ b/drivers/rtc/rtc-fsl-ftm-alarm.c @@ -327,12 +327,7 @@ static struct platform_driver ftm_rtc_driver = { }, }; -static int __init ftm_alarm_init(void) -{ - return platform_driver_register(&ftm_rtc_driver); -} - -device_initcall(ftm_alarm_init); +module_platform_driver(ftm_rtc_driver); MODULE_DESCRIPTION("NXP/Freescale FlexTimer alarm driver"); MODULE_AUTHOR("Biwen Li "); -- GitLab From c69bffe199270ce001d5764985a8e414c7e05fee Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:49 +0200 Subject: [PATCH 236/694] dt-bindings: rtc: convert hym8563 bindings to json-schema Convert RTC binding for Haoyu Microelectronics HYM8563 to Device Tree Schema format. Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221024165549.74574-7-sebastian.reichel@collabora.com Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/haoyu,hym8563.txt | 30 ---------- .../bindings/rtc/haoyu,hym8563.yaml | 56 +++++++++++++++++++ 2 files changed, 56 insertions(+), 30 deletions(-) delete mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt create mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt deleted file mode 100644 index a8934fe2ab4c1..0000000000000 --- a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt +++ /dev/null @@ -1,30 +0,0 @@ -Haoyu Microelectronics HYM8563 Real Time Clock - -The HYM8563 provides basic rtc and alarm functionality -as well as a clock output of up to 32kHz. - -Required properties: -- compatible: should be: "haoyu,hym8563" -- reg: i2c address -- #clock-cells: the value should be 0 - -Optional properties: -- clock-output-names: From common clock binding -- interrupts: rtc alarm/event interrupt - -Example: - -hym8563: hym8563@51 { - compatible = "haoyu,hym8563"; - reg = <0x51>; - - interrupts = <13 IRQ_TYPE_EDGE_FALLING>; - - #clock-cells = <0>; -}; - -device { -... - clocks = <&hym8563>; -... -}; diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml new file mode 100644 index 0000000000000..0b9f39ef0edc3 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/haoyu,hym8563.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Haoyu Microelectronics HYM8563 RTC + +maintainers: + - Alexandre Belloni + +properties: + compatible: + const: haoyu,hym8563 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#clock-cells": + const: 0 + + clock-output-names: + description: From common clock binding to override the default output clock name. + maxItems: 1 + + wakeup-source: + description: Enables wake up of host system on alarm. + +allOf: + - $ref: rtc.yaml + +unevaluatedProperties: false + +required: + - compatible + - reg + - "#clock-cells" + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + #clock-cells = <0>; + }; + }; -- GitLab From f8513363b0b7155afaee09cca777fc608dc957e3 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Mon, 5 Sep 2022 09:01:19 +0000 Subject: [PATCH 237/694] rtc: s35390a: Remove the unneeded result variable Return the value s35390a_set_reg() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/20220905090119.335121-1-ye.xingchen@zte.com.cn Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-s35390a.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 81d97b1d31591..b18daaf72b17d 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -211,7 +211,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct i2c_client *client = to_i2c_client(dev); struct s35390a *s35390a = i2c_get_clientdata(client); - int i, err; + int i; char buf[7], status; dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d mday=%d, " @@ -234,9 +234,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm) for (i = 0; i < 7; ++i) buf[i] = bitrev8(buf[i]); - err = s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); - - return err; + return s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); } static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm) -- GitLab From 8d816c1eaa752546fa3221f5027af0a667ff0c8f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Nov 2022 12:02:25 +0100 Subject: [PATCH 238/694] rtc: isl12022: add support for temperature sensor The isl12022 has built-in temperature compensation effective over the range -40C to +85C. It exposes the average of the last two temperature measurements as a 10-bit value in half-Kelvins. Make this available via the hwmon framework. Reviewed-by: Guenter Roeck Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20221104110225.2219761-1-linux@rasmusvillemoes.dk Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-isl12022.c | 94 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index ca677c4265e6c..a3b0de3393f57 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -17,6 +17,7 @@ #include #include #include +#include /* ISL register offsets */ #define ISL12022_REG_SC 0x00 @@ -30,6 +31,9 @@ #define ISL12022_REG_SR 0x07 #define ISL12022_REG_INT 0x08 +#define ISL12022_REG_BETA 0x0d +#define ISL12022_REG_TEMP_L 0x28 + /* ISL register bits */ #define ISL12022_HR_MIL (1 << 7) /* military or 24 hour time */ @@ -38,6 +42,7 @@ #define ISL12022_INT_WRTC (1 << 6) +#define ISL12022_BETA_TSE (1 << 7) static struct i2c_driver isl12022_driver; @@ -46,6 +51,93 @@ struct isl12022 { struct regmap *regmap; }; +static umode_t isl12022_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + if (type == hwmon_temp && attr == hwmon_temp_input) + return 0444; + + return 0; +} + +/* + * A user-initiated temperature conversion is not started by this function, + * so the temperature is updated once every ~60 seconds. + */ +static int isl12022_hwmon_read_temp(struct device *dev, long *mC) +{ + struct isl12022 *isl12022 = dev_get_drvdata(dev); + struct regmap *regmap = isl12022->regmap; + u8 temp_buf[2]; + int temp, ret; + + ret = regmap_bulk_read(regmap, ISL12022_REG_TEMP_L, + temp_buf, sizeof(temp_buf)); + if (ret) + return ret; + /* + * Temperature is represented as a 10-bit number, unit half-Kelvins. + */ + temp = (temp_buf[1] << 8) | temp_buf[0]; + temp *= 500; + temp -= 273000; + + *mC = temp; + + return 0; +} + +static int isl12022_hwmon_read(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + if (type == hwmon_temp && attr == hwmon_temp_input) + return isl12022_hwmon_read_temp(dev, val); + + return -EOPNOTSUPP; +} + +static const struct hwmon_channel_info *isl12022_hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + NULL +}; + +static const struct hwmon_ops isl12022_hwmon_ops = { + .is_visible = isl12022_hwmon_is_visible, + .read = isl12022_hwmon_read, +}; + +static const struct hwmon_chip_info isl12022_hwmon_chip_info = { + .ops = &isl12022_hwmon_ops, + .info = isl12022_hwmon_info, +}; + +static void isl12022_hwmon_register(struct device *dev) +{ + struct isl12022 *isl12022; + struct device *hwmon; + int ret; + + if (!IS_REACHABLE(CONFIG_HWMON)) + return; + + isl12022 = dev_get_drvdata(dev); + + ret = regmap_update_bits(isl12022->regmap, ISL12022_REG_BETA, + ISL12022_BETA_TSE, ISL12022_BETA_TSE); + if (ret) { + dev_warn(dev, "unable to enable temperature sensor\n"); + return; + } + + hwmon = devm_hwmon_device_register_with_info(dev, "isl12022", isl12022, + &isl12022_hwmon_chip_info, + NULL); + if (IS_ERR(hwmon)) + dev_warn(dev, "unable to register hwmon device: %pe\n", hwmon); +} + /* * In the routines that deal directly with the isl12022 hardware, we use * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. @@ -160,6 +252,8 @@ static int isl12022_probe(struct i2c_client *client) return PTR_ERR(isl12022->regmap); } + isl12022_hwmon_register(&client->dev); + isl12022->rtc = devm_rtc_allocate_device(&client->dev); if (IS_ERR(isl12022->rtc)) return PTR_ERR(isl12022->rtc); -- GitLab From e59b3c730b44f042540319d62cba73054fd928c8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Nov 2022 09:00:51 +0100 Subject: [PATCH 239/694] rtc: Include when appropriate The kstrto() functions have been moved from kernel.h to kstrtox.h. So, include the latter directly in the appropriate files. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/786421fd0435a32206288904a1f879436a717529.1667721637.git.christophe.jaillet@wanadoo.fr Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-abx80x.c | 1 + drivers/rtc/rtc-bq32k.c | 1 + drivers/rtc/rtc-ds1307.c | 1 + drivers/rtc/rtc-rv3029c2.c | 1 + drivers/rtc/rtc-rx8025.c | 1 + drivers/rtc/sysfs.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index e7f325ced9402..2e0e6432901b8 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index 6d6a55efb9cc7..967ddc6bf76d6 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index d51565bcc1896..7c2276cf5514a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c index eb483a30bd92f..e4fdd47ae066c 100644 --- a/drivers/rtc/rtc-rv3029c2.c +++ b/drivers/rtc/rtc-rv3029c2.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* Register map */ diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 77d3cb08b5ec2..331c20d4d843c 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c index 00f1945bcb7e1..e3062c4d3f2ce 100644 --- a/drivers/rtc/sysfs.c +++ b/drivers/rtc/sysfs.c @@ -6,6 +6,7 @@ * Author: Alessandro Zummo */ +#include #include #include -- GitLab From 4dfe05bdc1ade79b943d4979a2e2a8b5ef68fbb5 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 27 Oct 2022 17:32:49 +0100 Subject: [PATCH 240/694] rtc: ds1347: fix value written to century register In `ds1347_set_time()`, the wrong value is being written to the `DS1347_CENTURY_REG` register. It needs to be converted to BCD. Fix it. Fixes: 147dae76dbb9 ("rtc: ds1347: handle century register") Cc: # v5.5+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20221027163249.447416-1-abbotti@mev.co.uk Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1347.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c index 157bf5209ac40..a40c1a52df659 100644 --- a/drivers/rtc/rtc-ds1347.c +++ b/drivers/rtc/rtc-ds1347.c @@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt) return err; century = (dt->tm_year / 100) + 19; - err = regmap_write(map, DS1347_CENTURY_REG, century); + err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century)); if (err) return err; -- GitLab From 60da73808298ff2cfa9f165d55eb3d7aa7078601 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 10 Nov 2022 17:08:10 +0800 Subject: [PATCH 241/694] rtc: class: Fix potential memleak in devm_rtc_allocate_device() devm_rtc_allocate_device() will alloc a rtc_device first, and then run dev_set_name(). If dev_set_name() failed, the rtc_device will memleak. Move devm_add_action_or_reset() in front of dev_set_name() to prevent memleak. unreferenced object 0xffff888110a53000 (size 2048): comm "python3", pid 470, jiffies 4296078308 (age 58.882s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 08 30 a5 10 81 88 ff ff .........0...... 08 30 a5 10 81 88 ff ff 00 00 00 00 00 00 00 00 .0.............. backtrace: [<000000004aac0364>] kmalloc_trace+0x21/0x110 [<000000000ff02202>] devm_rtc_allocate_device+0xd4/0x400 [<000000001bdf5639>] devm_rtc_device_register+0x1a/0x80 [<00000000351bf81c>] rx4581_probe+0xdd/0x110 [rtc_rx4581] [<00000000f0eba0ae>] spi_probe+0xde/0x130 [<00000000bff89ee8>] really_probe+0x175/0x3f0 [<00000000128e8d84>] __driver_probe_device+0xe6/0x170 [<00000000ee5bf913>] device_driver_attach+0x32/0x80 [<00000000f3f28f92>] bind_store+0x10b/0x1a0 [<000000009ff812d8>] drv_attr_store+0x49/0x70 [<000000008139c323>] sysfs_kf_write+0x8d/0xb0 [<00000000b6146e01>] kernfs_fop_write_iter+0x214/0x2d0 [<00000000ecbe3895>] vfs_write+0x61a/0x7d0 [<00000000aa2196ea>] ksys_write+0xc8/0x190 [<0000000046a600f5>] do_syscall_64+0x37/0x90 [<00000000541a336f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 24d23181e43d ("rtc: class: check return value when calling dev_set_name()") Signed-off-by: Shang XiaoJing Reviewed-by: Yang Yingliang Link: https://lore.kernel.org/r/20221110090810.11225-1-shangxiaojing@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/class.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index e48223c00c672..e5b7b48cffac0 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -374,11 +374,11 @@ struct rtc_device *devm_rtc_allocate_device(struct device *dev) rtc->id = id; rtc->dev.parent = dev; - err = dev_set_name(&rtc->dev, "rtc%d", id); + err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); if (err) return ERR_PTR(err); - err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc); + err = dev_set_name(&rtc->dev, "rtc%d", id); if (err) return ERR_PTR(err); -- GitLab From 508ccdfb86b21da37ad091003a4d4567709d5dfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:07:08 +0100 Subject: [PATCH 242/694] rtc: cmos: Call cmos_wake_setup() from cmos_do_probe() Notice that cmos_wake_setup() is the only user of acpi_rtc_info and it can operate on the cmos_rtc variable directly, so it need not set the platform_data pointer before cmos_do_probe() is called. Instead, it can be called by cmos_do_probe() in the case when the platform_data pointer is not set to implement the default behavior (which is to use the FADT information as long as ACPI support is enabled). Modify the code accordingly. While at it, drop a comment that doesn't really match the code it is supposed to be describing. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/4803444.31r3eYUQgx@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 47 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 58cc2bae2f8a0..a84262265d6d9 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,6 +744,8 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } +static void cmos_wake_setup(struct device *dev); + #ifdef CONFIG_PNP #define INITSECTION @@ -827,19 +829,27 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) if (info->address_space) address_space = info->address_space; - if (info->rtc_day_alarm && info->rtc_day_alarm < 128) - cmos_rtc.day_alrm = info->rtc_day_alarm; - if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128) - cmos_rtc.mon_alrm = info->rtc_mon_alarm; - if (info->rtc_century && info->rtc_century < 128) - cmos_rtc.century = info->rtc_century; + cmos_rtc.day_alrm = info->rtc_day_alarm; + cmos_rtc.mon_alrm = info->rtc_mon_alarm; + cmos_rtc.century = info->rtc_century; if (info->wake_on && info->wake_off) { cmos_rtc.wake_on = info->wake_on; cmos_rtc.wake_off = info->wake_off; } + } else { + cmos_wake_setup(dev); } + if (cmos_rtc.day_alrm >= 128) + cmos_rtc.day_alrm = 0; + + if (cmos_rtc.mon_alrm >= 128) + cmos_rtc.mon_alrm = 0; + + if (cmos_rtc.century >= 128) + cmos_rtc.century = 0; + cmos_rtc.dev = dev; dev_set_drvdata(dev, &cmos_rtc); @@ -1275,13 +1285,6 @@ static void use_acpi_alarm_quirks(void) static inline void use_acpi_alarm_quirks(void) { } #endif -/* Every ACPI platform has a mc146818 compatible "cmos rtc". Here we find - * its device node and pass extra config data. This helps its driver use - * capabilities that the now-obsolete mc146818 didn't have, and informs it - * that this board's RTC is wakeup-capable (per ACPI spec). - */ -static struct cmos_rtc_board_info acpi_rtc_info; - static void cmos_wake_setup(struct device *dev) { if (acpi_disabled) @@ -1289,26 +1292,23 @@ static void cmos_wake_setup(struct device *dev) use_acpi_alarm_quirks(); - acpi_rtc_info.wake_on = rtc_wake_on; - acpi_rtc_info.wake_off = rtc_wake_off; + cmos_rtc.wake_on = rtc_wake_on; + cmos_rtc.wake_off = rtc_wake_off; - /* workaround bug in some ACPI tables */ + /* ACPI tables bug workaround. */ if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { dev_dbg(dev, "bogus FADT month_alarm (%d)\n", acpi_gbl_FADT.month_alarm); acpi_gbl_FADT.month_alarm = 0; } - acpi_rtc_info.rtc_day_alarm = acpi_gbl_FADT.day_alarm; - acpi_rtc_info.rtc_mon_alarm = acpi_gbl_FADT.month_alarm; - acpi_rtc_info.rtc_century = acpi_gbl_FADT.century; + cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; + cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; + cmos_rtc.century = acpi_gbl_FADT.century; - /* NOTE: S4_RTC_WAKE is NOT currently useful to Linux */ if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) dev_info(dev, "RTC can wake from S4\n"); - dev->platform_data = &acpi_rtc_info; - /* RTC always wakes from S1/S2/S3, and often S4/STD */ device_init_wakeup(dev, 1); } @@ -1359,8 +1359,6 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) { int irq, ret; - cmos_wake_setup(&pnp->dev); - if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) { irq = 0; #ifdef CONFIG_X86 @@ -1468,7 +1466,6 @@ static int __init cmos_platform_probe(struct platform_device *pdev) int irq, ret; cmos_of_init(pdev); - cmos_wake_setup(&pdev->dev); if (RTC_IOMAPPED) resource = platform_get_resource(pdev, IORESOURCE_IO, 0); -- GitLab From 375bbba09692fe4c5218eddee8e312dd733fa846 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:09:07 +0100 Subject: [PATCH 243/694] rtc: cmos: Call rtc_wake_setup() from cmos_do_probe() To reduce code duplication, move the invocation of rtc_wake_setup() into cmos_do_probe() and simplify the callers of the latter. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/2143522.irdbgypaU6@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index a84262265d6d9..583116994a37f 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,6 +744,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } +static inline void rtc_wake_setup(struct device *dev); static void cmos_wake_setup(struct device *dev); #ifdef CONFIG_PNP @@ -938,6 +939,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) nvmem_cfg.size = address_space - NVRAM_OFFSET; devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg); + /* + * Everything has gone well so far, so by default register a handler for + * the ACPI RTC fixed event. + */ + if (!info) + rtc_wake_setup(dev); + dev_info(dev, "%s%s, %d bytes nvram%s\n", !is_valid_irq(rtc_irq) ? "no alarms" : cmos_rtc.mon_alrm ? "alarms up to one year" : @@ -1357,7 +1365,7 @@ static void rtc_wake_setup(struct device *dev) static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) { - int irq, ret; + int irq; if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) { irq = 0; @@ -1373,13 +1381,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) irq = pnp_irq(pnp, 0); } - ret = cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); - if (ret) - return ret; - - rtc_wake_setup(&pnp->dev); - - return 0; + return cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); } static void cmos_pnp_remove(struct pnp_dev *pnp) @@ -1463,7 +1465,7 @@ static inline void cmos_of_init(struct platform_device *pdev) {} static int __init cmos_platform_probe(struct platform_device *pdev) { struct resource *resource; - int irq, ret; + int irq; cmos_of_init(pdev); @@ -1475,13 +1477,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev) if (irq < 0) irq = -1; - ret = cmos_do_probe(&pdev->dev, resource, irq); - if (ret) - return ret; - - rtc_wake_setup(&pdev->dev); - - return 0; + return cmos_do_probe(&pdev->dev, resource, irq); } static int cmos_platform_remove(struct platform_device *pdev) -- GitLab From dca4d3b71c8a09a16951add656711fbd6f5bfbb0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:09:32 +0100 Subject: [PATCH 244/694] rtc: cmos: Eliminate forward declarations of some functions Reorder the ACPI-related code before cmos_do_probe() so as to eliminate excessive forward declarations of some functions. While at it, for consistency, add the inline modifier to the definitions of empty stub static funtions and remove it from the corresponding definitions of functions with non-empty bodies. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/13157911.uLZWGnKmhe@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 304 ++++++++++++++++++++--------------------- 1 file changed, 149 insertions(+), 155 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 583116994a37f..2a21d8281aa61 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -744,8 +744,155 @@ static irqreturn_t cmos_interrupt(int irq, void *p) return IRQ_NONE; } -static inline void rtc_wake_setup(struct device *dev); -static void cmos_wake_setup(struct device *dev); +#ifdef CONFIG_ACPI + +#include + +static u32 rtc_handler(void *context) +{ + struct device *dev = context; + struct cmos_rtc *cmos = dev_get_drvdata(dev); + unsigned char rtc_control = 0; + unsigned char rtc_intr; + unsigned long flags; + + + /* + * Always update rtc irq when ACPI is used as RTC Alarm. + * Or else, ACPI SCI is enabled during suspend/resume only, + * update rtc irq in that case. + */ + if (cmos_use_acpi_alarm()) + cmos_interrupt(0, (void *)cmos->rtc); + else { + /* Fix me: can we use cmos_interrupt() here as well? */ + spin_lock_irqsave(&rtc_lock, flags); + if (cmos_rtc.suspend_ctrl) + rtc_control = CMOS_READ(RTC_CONTROL); + if (rtc_control & RTC_AIE) { + cmos_rtc.suspend_ctrl &= ~RTC_AIE; + CMOS_WRITE(rtc_control, RTC_CONTROL); + rtc_intr = CMOS_READ(RTC_INTR_FLAGS); + rtc_update_irq(cmos->rtc, 1, rtc_intr); + } + spin_unlock_irqrestore(&rtc_lock, flags); + } + + pm_wakeup_hard_event(dev); + acpi_clear_event(ACPI_EVENT_RTC); + acpi_disable_event(ACPI_EVENT_RTC, 0); + return ACPI_INTERRUPT_HANDLED; +} + +static void rtc_wake_setup(struct device *dev) +{ + if (acpi_disabled) + return; + + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); + /* + * After the RTC handler is installed, the Fixed_RTC event should + * be disabled. Only when the RTC alarm is set will it be enabled. + */ + acpi_clear_event(ACPI_EVENT_RTC); + acpi_disable_event(ACPI_EVENT_RTC, 0); +} + +static void rtc_wake_on(struct device *dev) +{ + acpi_clear_event(ACPI_EVENT_RTC); + acpi_enable_event(ACPI_EVENT_RTC, 0); +} + +static void rtc_wake_off(struct device *dev) +{ + acpi_disable_event(ACPI_EVENT_RTC, 0); +} + +#ifdef CONFIG_X86 +/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ +static void use_acpi_alarm_quirks(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + if (!is_hpet_enabled()) + return; + + if (dmi_get_bios_year() < 2015) + return; + + use_acpi_alarm = true; +} +#else +static inline void use_acpi_alarm_quirks(void) { } +#endif + +static void cmos_wake_setup(struct device *dev) +{ + if (acpi_disabled) + return; + + use_acpi_alarm_quirks(); + + cmos_rtc.wake_on = rtc_wake_on; + cmos_rtc.wake_off = rtc_wake_off; + + /* ACPI tables bug workaround. */ + if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { + dev_dbg(dev, "bogus FADT month_alarm (%d)\n", + acpi_gbl_FADT.month_alarm); + acpi_gbl_FADT.month_alarm = 0; + } + + cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; + cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; + cmos_rtc.century = acpi_gbl_FADT.century; + + if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) + dev_info(dev, "RTC can wake from S4\n"); + + /* RTC always wakes from S1/S2/S3, and often S4/STD */ + device_init_wakeup(dev, 1); +} + +static void cmos_check_acpi_rtc_status(struct device *dev, + unsigned char *rtc_control) +{ + struct cmos_rtc *cmos = dev_get_drvdata(dev); + acpi_event_status rtc_status; + acpi_status status; + + if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) + return; + + status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); + if (ACPI_FAILURE(status)) { + dev_err(dev, "Could not get RTC status\n"); + } else if (rtc_status & ACPI_EVENT_FLAG_SET) { + unsigned char mask; + *rtc_control &= ~RTC_AIE; + CMOS_WRITE(*rtc_control, RTC_CONTROL); + mask = CMOS_READ(RTC_INTR_FLAGS); + rtc_update_irq(cmos->rtc, 1, mask); + } +} + +#else /* !CONFIG_ACPI */ + +static inline void rtc_wake_setup(struct device *dev) +{ +} + +static inline void cmos_wake_setup(struct device *dev) +{ +} + +static inline void cmos_check_acpi_rtc_status(struct device *dev, + unsigned char *rtc_control) +{ +} +#endif /* CONFIG_ACPI */ #ifdef CONFIG_PNP #define INITSECTION @@ -1140,9 +1287,6 @@ static void cmos_check_wkalrm(struct device *dev) } } -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control); - static int __maybe_unused cmos_resume(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); @@ -1209,156 +1353,6 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume); * predate even PNPBIOS should set up platform_bus devices. */ -#ifdef CONFIG_ACPI - -#include - -static u32 rtc_handler(void *context) -{ - struct device *dev = context; - struct cmos_rtc *cmos = dev_get_drvdata(dev); - unsigned char rtc_control = 0; - unsigned char rtc_intr; - unsigned long flags; - - - /* - * Always update rtc irq when ACPI is used as RTC Alarm. - * Or else, ACPI SCI is enabled during suspend/resume only, - * update rtc irq in that case. - */ - if (cmos_use_acpi_alarm()) - cmos_interrupt(0, (void *)cmos->rtc); - else { - /* Fix me: can we use cmos_interrupt() here as well? */ - spin_lock_irqsave(&rtc_lock, flags); - if (cmos_rtc.suspend_ctrl) - rtc_control = CMOS_READ(RTC_CONTROL); - if (rtc_control & RTC_AIE) { - cmos_rtc.suspend_ctrl &= ~RTC_AIE; - CMOS_WRITE(rtc_control, RTC_CONTROL); - rtc_intr = CMOS_READ(RTC_INTR_FLAGS); - rtc_update_irq(cmos->rtc, 1, rtc_intr); - } - spin_unlock_irqrestore(&rtc_lock, flags); - } - - pm_wakeup_hard_event(dev); - acpi_clear_event(ACPI_EVENT_RTC); - acpi_disable_event(ACPI_EVENT_RTC, 0); - return ACPI_INTERRUPT_HANDLED; -} - -static inline void rtc_wake_setup(struct device *dev) -{ - if (acpi_disabled) - return; - - acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); - /* - * After the RTC handler is installed, the Fixed_RTC event should - * be disabled. Only when the RTC alarm is set will it be enabled. - */ - acpi_clear_event(ACPI_EVENT_RTC); - acpi_disable_event(ACPI_EVENT_RTC, 0); -} - -static void rtc_wake_on(struct device *dev) -{ - acpi_clear_event(ACPI_EVENT_RTC); - acpi_enable_event(ACPI_EVENT_RTC, 0); -} - -static void rtc_wake_off(struct device *dev) -{ - acpi_disable_event(ACPI_EVENT_RTC, 0); -} - -#ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ -static void use_acpi_alarm_quirks(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return; - - if (!is_hpet_enabled()) - return; - - if (dmi_get_bios_year() < 2015) - return; - - use_acpi_alarm = true; -} -#else -static inline void use_acpi_alarm_quirks(void) { } -#endif - -static void cmos_wake_setup(struct device *dev) -{ - if (acpi_disabled) - return; - - use_acpi_alarm_quirks(); - - cmos_rtc.wake_on = rtc_wake_on; - cmos_rtc.wake_off = rtc_wake_off; - - /* ACPI tables bug workaround. */ - if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { - dev_dbg(dev, "bogus FADT month_alarm (%d)\n", - acpi_gbl_FADT.month_alarm); - acpi_gbl_FADT.month_alarm = 0; - } - - cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; - cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; - cmos_rtc.century = acpi_gbl_FADT.century; - - if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) - dev_info(dev, "RTC can wake from S4\n"); - - /* RTC always wakes from S1/S2/S3, and often S4/STD */ - device_init_wakeup(dev, 1); -} - -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control) -{ - struct cmos_rtc *cmos = dev_get_drvdata(dev); - acpi_event_status rtc_status; - acpi_status status; - - if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) - return; - - status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); - if (ACPI_FAILURE(status)) { - dev_err(dev, "Could not get RTC status\n"); - } else if (rtc_status & ACPI_EVENT_FLAG_SET) { - unsigned char mask; - *rtc_control &= ~RTC_AIE; - CMOS_WRITE(*rtc_control, RTC_CONTROL); - mask = CMOS_READ(RTC_INTR_FLAGS); - rtc_update_irq(cmos->rtc, 1, mask); - } -} - -#else - -static void cmos_wake_setup(struct device *dev) -{ -} - -static void cmos_check_acpi_rtc_status(struct device *dev, - unsigned char *rtc_control) -{ -} - -static void rtc_wake_setup(struct device *dev) -{ -} -#endif - #ifdef CONFIG_PNP #include -- GitLab From d13e9ad9f5146f066a5c5a1cc993d09e4fb21ead Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:12:00 +0100 Subject: [PATCH 245/694] rtc: cmos: Rename ACPI-related functions The names of rtc_wake_setup() and cmos_wake_setup() don't indicate that these functions are ACPI-related, which is the case, and the former doesn't really reflect the role of the function. Rename them to acpi_rtc_event_setup() and acpi_cmos_wake_setup(), respectively, to address this shortcoming. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/3225614.44csPzL39Z@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 2a21d8281aa61..039486bfedf48 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -784,7 +784,7 @@ static u32 rtc_handler(void *context) return ACPI_INTERRUPT_HANDLED; } -static void rtc_wake_setup(struct device *dev) +static void acpi_rtc_event_setup(struct device *dev) { if (acpi_disabled) return; @@ -828,7 +828,7 @@ static void use_acpi_alarm_quirks(void) static inline void use_acpi_alarm_quirks(void) { } #endif -static void cmos_wake_setup(struct device *dev) +static void acpi_cmos_wake_setup(struct device *dev) { if (acpi_disabled) return; @@ -880,11 +880,11 @@ static void cmos_check_acpi_rtc_status(struct device *dev, #else /* !CONFIG_ACPI */ -static inline void rtc_wake_setup(struct device *dev) +static inline void acpi_rtc_event_setup(struct device *dev) { } -static inline void cmos_wake_setup(struct device *dev) +static inline void acpi_cmos_wake_setup(struct device *dev) { } @@ -986,7 +986,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) cmos_rtc.wake_off = info->wake_off; } } else { - cmos_wake_setup(dev); + acpi_cmos_wake_setup(dev); } if (cmos_rtc.day_alrm >= 128) @@ -1091,7 +1091,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) * the ACPI RTC fixed event. */ if (!info) - rtc_wake_setup(dev); + acpi_rtc_event_setup(dev); dev_info(dev, "%s%s, %d bytes nvram%s\n", !is_valid_irq(rtc_irq) ? "no alarms" : -- GitLab From 83ebb7b3036d151ee39a4a752018665648fc3bd4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Nov 2022 13:15:36 +0100 Subject: [PATCH 246/694] rtc: cmos: Disable ACPI RTC event on removal Make cmos_do_remove() drop the ACPI RTC fixed event handler so as to prevent it from operating on stale data in case the event triggers after driver removal. Fixes: 311ee9c151ad ("rtc: cmos: allow using ACPI for RTC alarm instead of HPET") Signed-off-by: Rafael J. Wysocki Reviewed-by: Zhang Rui Tested-by: Zhang Rui Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/2224609.iZASKD2KPV@kreacher Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 039486bfedf48..00e2ca7374ecf 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -798,6 +798,14 @@ static void acpi_rtc_event_setup(struct device *dev) acpi_disable_event(ACPI_EVENT_RTC, 0); } +static void acpi_rtc_event_cleanup(void) +{ + if (acpi_disabled) + return; + + acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler); +} + static void rtc_wake_on(struct device *dev) { acpi_clear_event(ACPI_EVENT_RTC); @@ -884,6 +892,10 @@ static inline void acpi_rtc_event_setup(struct device *dev) { } +static inline void acpi_rtc_event_cleanup(void) +{ +} + static inline void acpi_cmos_wake_setup(struct device *dev) { } @@ -1138,6 +1150,9 @@ static void cmos_do_remove(struct device *dev) hpet_unregister_irq_handler(cmos_interrupt); } + if (!dev_get_platdata(dev)) + acpi_rtc_event_cleanup(); + cmos->rtc = NULL; ports = cmos->iomem; -- GitLab From 0462681e207ccc44778a77b3297af728b1cf5b9f Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Sun, 6 Nov 2022 12:59:15 +0100 Subject: [PATCH 247/694] rtc: snvs: Allow a time difference on clock register read On an iMX6ULL the following message appears when a wakealarm is set: echo 0 > /sys/class/rtc/rtc1/wakealarm rtc rtc1: Timeout trying to get valid LPSRT Counter read This does not always happen but is reproducible quite often (7 out of 10 times). The problem appears because the iMX6ULL is not able to read the registers within one 32kHz clock cycle which is the base clock of the RTC. Therefore, this patch allows a difference of up to 320 cycles (10ms). 10ms was chosen to be big enough even on systems with less cpu power (e.g. iMX6ULL). According to the reference manual a difference is fine: - If the two consecutive reads are similar, the value is correct. The values have to be similar, not equal. Fixes: cd7f3a249dbe ("rtc: snvs: Add timeouts to avoid kernel lockups") Reviewed-by: Francesco Dolcini Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Link: https://lore.kernel.org/r/20221106115915.7930-1-francesco@dolcini.it Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-snvs.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index bd929b0e7d7de..d82acf1af1fae 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -32,6 +32,14 @@ #define SNVS_LPPGDR_INIT 0x41736166 #define CNTR_TO_SECS_SH 15 +/* The maximum RTC clock cycles that are allowed to pass between two + * consecutive clock counter register reads. If the values are corrupted a + * bigger difference is expected. The RTC frequency is 32kHz. With 320 cycles + * we end at 10ms which should be enough for most cases. If it once takes + * longer than expected we do a retry. + */ +#define MAX_RTC_READ_DIFF_CYCLES 320 + struct snvs_rtc_data { struct rtc_device *rtc; struct regmap *regmap; @@ -56,6 +64,7 @@ static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) { u64 read1, read2; + s64 diff; unsigned int timeout = 100; /* As expected, the registers might update between the read of the LSB @@ -66,7 +75,8 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) do { read2 = read1; read1 = rtc_read_lpsrt(data); - } while (read1 != read2 && --timeout); + diff = read1 - read2; + } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); if (!timeout) dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); @@ -78,13 +88,15 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) { u32 count1, count2; + s32 diff; unsigned int timeout = 100; regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); do { count2 = count1; regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); - } while (count1 != count2 && --timeout); + diff = count1 - count2; + } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); if (!timeout) { dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); return -ETIMEDOUT; -- GitLab From 60cfac17d0a1c28cd41959e95ba1e0ecc47165e7 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 1 Mar 2022 14:12:20 +0100 Subject: [PATCH 248/694] rtc: pcf8563: clear RTC_FEATURE_ALARM if no irq If there is no IRQ hooked up, clear RTC_FEATURE_ALARM to make the core ensure that userspace is made aware that alarms are not supported. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20220301131220.4011810-1-vincent.whitchurch@axis.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8563.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 11fa9788558be..0a7fd94784651 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -567,6 +567,8 @@ static int pcf8563_probe(struct i2c_client *client) client->irq); return err; } + } else { + clear_bit(RTC_FEATURE_ALARM, pcf8563->rtc->features); } err = devm_rtc_register_device(pcf8563->rtc); -- GitLab From eec79501cce6e8965e92174760c6a9e92d78a038 Mon Sep 17 00:00:00 2001 From: Riwen Lu Date: Wed, 10 Aug 2022 15:01:09 +0800 Subject: [PATCH 249/694] rtc: efi: Add wakeup support Add wakeup support for rtc-efi, so we can wakeup from S3/S4/S5 through rtcwake. Signed-off-by: Riwen Lu Link: https://lore.kernel.org/r/TYWP286MB260191455377CEBD2336557EB1659@TYWP286MB2601.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 11850c2880ad4..e991cccdb6e9c 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -271,6 +271,8 @@ static int __init efi_rtc_probe(struct platform_device *dev) clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features); + device_init_wakeup(&dev->dev, true); + return devm_rtc_register_device(rtc); } -- GitLab From fe0157ba679dc95407dd5eae6550a4ceaea75040 Mon Sep 17 00:00:00 2001 From: paulmn Date: Mon, 29 Aug 2022 14:46:39 +0200 Subject: [PATCH 250/694] rtc: pcf8523: fix for stop bit Bugfix for an issue detected when a goldcap capacitor gets fully discharged due to a long absence of the power supply, and then recharges again. The RTC failed to continue to keep the real-time clock. This was caused by the incorrect handling of the STOP bit in the RTC internal register. This fix solves the problem. Signed-off-by: paulmn Link: https://lore.kernel.org/r/20220829124639.10906-1-paulmn@axis.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf8523.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 6174b3fd4b985..92de99f11a7a5 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -99,24 +99,24 @@ static irqreturn_t pcf8523_irq(int irq, void *dev_id) static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct pcf8523 *pcf8523 = dev_get_drvdata(dev); - u8 regs[7]; + u8 regs[10]; int err; - err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_SECONDS, regs, + err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_CONTROL1, regs, sizeof(regs)); if (err < 0) return err; - if (regs[0] & PCF8523_SECONDS_OS) + if ((regs[0] & PCF8523_CONTROL1_STOP) || (regs[3] & PCF8523_SECONDS_OS)) return -EINVAL; - tm->tm_sec = bcd2bin(regs[0] & 0x7f); - tm->tm_min = bcd2bin(regs[1] & 0x7f); - tm->tm_hour = bcd2bin(regs[2] & 0x3f); - tm->tm_mday = bcd2bin(regs[3] & 0x3f); - tm->tm_wday = regs[4] & 0x7; - tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1; - tm->tm_year = bcd2bin(regs[6]) + 100; + tm->tm_sec = bcd2bin(regs[3] & 0x7f); + tm->tm_min = bcd2bin(regs[4] & 0x7f); + tm->tm_hour = bcd2bin(regs[5] & 0x3f); + tm->tm_mday = bcd2bin(regs[6] & 0x3f); + tm->tm_wday = regs[7] & 0x7; + tm->tm_mon = bcd2bin(regs[8] & 0x1f) - 1; + tm->tm_year = bcd2bin(regs[9]) + 100; return 0; } -- GitLab From a6ceee26fd5ed9b5bd37322b1ca88e4548cee4a3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 21 Sep 2022 09:41:41 +0200 Subject: [PATCH 251/694] rtc: pcf85063: Fix reading alarm If the alarms are disabled the topmost bit (AEN_*) is set in the alarm registers. This is also interpreted in BCD number leading to this warning: rtc rtc0: invalid alarm value: 2022-09-21T80:80:80 Fix this by masking alarm enabling and reserved bits. Fixes: 05cb3a56ee8c ("rtc: pcf85063: add alarm support") Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20220921074141.3903104-1-alexander.stein@ew.tq-group.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 095891999da11..99f9cc57c7b30 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -169,10 +169,10 @@ static int pcf85063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (ret) return ret; - alrm->time.tm_sec = bcd2bin(buf[0]); - alrm->time.tm_min = bcd2bin(buf[1]); - alrm->time.tm_hour = bcd2bin(buf[2]); - alrm->time.tm_mday = bcd2bin(buf[3]); + alrm->time.tm_sec = bcd2bin(buf[0] & 0x7f); + alrm->time.tm_min = bcd2bin(buf[1] & 0x7f); + alrm->time.tm_hour = bcd2bin(buf[2] & 0x3f); + alrm->time.tm_mday = bcd2bin(buf[3] & 0x3f); ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &val); if (ret) -- GitLab From 1c137323e9a2a970b4a5bf8cf3c50e0ea1cefbeb Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:43 +0530 Subject: [PATCH 252/694] crypto: vmx: Skip objtool from running on aesp8-ppc.o With objtool enabled, below warnings are seen when trying to build: drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: aes_p8_set_encrypt_key+0x44: unannotated intra-function call drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2448: unannotated intra-function call drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2d68: unannotated intra-function call Skip objtool from running on drivers/crypto/vmx/aesp8-ppc.o file for the following reasons: - Since this file comes from OpenSSL, and since it is a perl file which generates a .S file, it may not be the best choice to make too many code changes to such files, unless absolutely necessary. - As far as the objtool --mcount functionality is concerned, we do not have to run objtool on this file because there are no calls to _mcount(). Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-6-sv@linux.ibm.com --- drivers/crypto/vmx/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index 2560cfea1dec2..7b41f0da6807f 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -9,3 +9,5 @@ targets += aesp8-ppc.S ghashp8-ppc.S $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perl) + +OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y -- GitLab From 2da37761671b5bdedbe04e6469cfa57cd6b6ae45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:44 +0530 Subject: [PATCH 253/694] powerpc/32: Fix objtool unannotated intra-function call warnings Fix several annotations in assembly files on PPC32. [Sathvika Vasireddy: Changed subject line and removed Kconfig change to enable objtool, as it is a part of "objtool/powerpc: Enable objtool to be built on ppc" patch in this series.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Signed-off-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-7-sv@linux.ibm.com --- arch/powerpc/kernel/cpu_setup_6xx.S | 26 ++++++++++++------ arch/powerpc/kernel/cpu_setup_e500.S | 8 ++++-- arch/powerpc/kernel/entry_32.S | 9 ++++-- arch/powerpc/kernel/head_40x.S | 5 +++- arch/powerpc/kernel/head_85xx.S | 5 +++- arch/powerpc/kernel/head_8xx.S | 5 +++- arch/powerpc/kernel/head_book3s_32.S | 29 ++++++++++++++------ arch/powerpc/kernel/swsusp_32.S | 5 +++- arch/powerpc/kvm/fpu.S | 17 ++++++++---- arch/powerpc/platforms/52xx/lite5200_sleep.S | 15 +++++++--- 10 files changed, 89 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8b5ff64b604f..f29ce3dd6140f 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -4,6 +4,8 @@ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) */ +#include + #include #include #include @@ -81,7 +83,7 @@ _GLOBAL(__setup_cpu_745x) blr /* Enable caches for 603's, 604, 750 & 7400 */ -setup_common_caches: +SYM_FUNC_START_LOCAL(setup_common_caches) mfspr r11,SPRN_HID0 andi. r0,r11,HID0_DCE ori r11,r11,HID0_ICE|HID0_DCE @@ -95,11 +97,12 @@ setup_common_caches: sync isync blr +SYM_FUNC_END(setup_common_caches) /* 604, 604e, 604ev, ... * Enable superscalar execution & branch history table */ -setup_604_hid0: +SYM_FUNC_START_LOCAL(setup_604_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SIED|HID0_BHTE ori r8,r11,HID0_BTCD @@ -110,6 +113,7 @@ setup_604_hid0: sync isync blr +SYM_FUNC_END(setup_604_hid0) /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. @@ -125,13 +129,14 @@ setup_604_hid0: * needed once we have applied workaround #5 (though it's * not set by Apple's firmware at least). */ -setup_7400_workarounds: +SYM_FUNC_START_LOCAL(setup_7400_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0207 ble 1f blr -setup_7410_workarounds: +SYM_FUNC_END(setup_7400_workarounds) +SYM_FUNC_START_LOCAL(setup_7410_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0100 @@ -151,6 +156,7 @@ setup_7410_workarounds: sync isync blr +SYM_FUNC_END(setup_7410_workarounds) /* 740/750/7400/7410 * Enable Store Gathering (SGE), Address Broadcast (ABE), @@ -158,7 +164,7 @@ setup_7410_workarounds: * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) */ -setup_750_7400_hid0: +SYM_FUNC_START_LOCAL(setup_750_7400_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC oris r11,r11,HID0_DPM@h @@ -177,12 +183,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_750_7400_hid0) /* 750cx specific * Looks like we have to disable NAP feature for some PLL settings... * (waiting for confirmation) */ -setup_750cx: +SYM_FUNC_START_LOCAL(setup_750cx) mfspr r10, SPRN_HID1 rlwinm r10,r10,4,28,31 cmpwi cr0,r10,7 @@ -196,11 +203,13 @@ setup_750cx: andc r6,r6,r7 stw r6,CPU_SPEC_FEATURES(r4) blr +SYM_FUNC_END(setup_750cx) /* 750fx specific */ -setup_750fx: +SYM_FUNC_START_LOCAL(setup_750fx) blr +SYM_FUNC_END(setup_750fx) /* MPC 745x * Enable Store Gathering (SGE), Branch Folding (FOLD) @@ -212,7 +221,7 @@ setup_750fx: * Clear Instruction cache throttling (ICTC) * Enable L2 HW prefetch */ -setup_745x_specifics: +SYM_FUNC_START_LOCAL(setup_745x_specifics) /* We check for the presence of an L3 cache setup by * the firmware. If any, we disable NAP capability as * it's known to be bogus on rev 2.1 and earlier @@ -270,6 +279,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_745x_specifics) /* * Initialize the FPU registers. This is needed to work around an errata diff --git a/arch/powerpc/kernel/cpu_setup_e500.S b/arch/powerpc/kernel/cpu_setup_e500.S index 2ab25161b0adc..077cfccc3461e 100644 --- a/arch/powerpc/kernel/cpu_setup_e500.S +++ b/arch/powerpc/kernel/cpu_setup_e500.S @@ -8,6 +8,8 @@ * Benjamin Herrenschmidt */ +#include + #include #include #include @@ -274,7 +276,7 @@ _GLOBAL(flush_dcache_L1) blr -has_L2_cache: +SYM_FUNC_START_LOCAL(has_L2_cache) /* skip L2 cache on P2040/P2040E as they have no L2 cache */ mfspr r3, SPRN_SVR /* shift right by 8 bits and clear E bit of SVR */ @@ -290,9 +292,10 @@ has_L2_cache: 1: li r3, 0 blr +SYM_FUNC_END(has_L2_cache) /* flush backside L2 cache */ -flush_backside_L2_cache: +SYM_FUNC_START_LOCAL(flush_backside_L2_cache) mflr r10 bl has_L2_cache mtlr r10 @@ -313,6 +316,7 @@ flush_backside_L2_cache: bne 1b 2: blr +SYM_FUNC_END(flush_backside_L2_cache) _GLOBAL(cpu_down_flush_e500v2) mflr r0 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3fc7c9886bb70..5e0763be15490 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include #include @@ -74,17 +76,18 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */ #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) - .globl __kuep_lock -__kuep_lock: +SYM_FUNC_START(__kuep_lock) lwz r9, THREAD+THSR0(r2) update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_lock) -__kuep_unlock: +SYM_FUNC_START_LOCAL(__kuep_unlock) lwz r9, THREAD+THSR0(r2) rlwinm r9,r9,0,~SR_NX update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_unlock) .macro kuep_lock bl __kuep_lock diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 088f500896c78..9110fe9d6747c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -28,6 +28,8 @@ #include #include #include +#include + #include #include #include @@ -662,7 +664,7 @@ start_here: * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) tlbia /* Invalidate all TLB entries */ isync @@ -711,6 +713,7 @@ initial_mmu: mtspr SPRN_EVPR,r0 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(abort) mfspr r13,SPRN_DBCR0 diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 52c0ab416326a..6be3cc36b716c 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -29,6 +29,8 @@ #include #include #include +#include + #include #include #include @@ -885,7 +887,7 @@ KernelSPE: * Translate the effec addr in r3 to phys addr. The phys addr will be put * into r3(higher 32bit) and r4(lower 32bit) */ -get_phys_addr: +SYM_FUNC_START_LOCAL(get_phys_addr) mfmsr r8 mfspr r9,SPRN_PID rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ @@ -907,6 +909,7 @@ get_phys_addr: mfspr r3,SPRN_MAS7 #endif blr +SYM_FUNC_END(get_phys_addr) /* * Global functions diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9f..c94ed5a08c935 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include #include @@ -625,7 +627,7 @@ start_here: * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ lis r10, MD_TWAM@h @@ -686,6 +688,7 @@ initial_mmu: #endif mtspr SPRN_DER, r8 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 519b606951675..4af12447dc0b6 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -18,6 +18,8 @@ #include #include +#include + #include #include #include @@ -877,7 +879,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ -early_hash_table: +SYM_FUNC_START_LOCAL(early_hash_table) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -888,8 +890,9 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 blr +SYM_FUNC_END(early_hash_table) -load_up_mmu: +SYM_FUNC_START_LOCAL(load_up_mmu) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -918,6 +921,7 @@ BEGIN_MMU_FTR_SECTION LOAD_BAT(7,r3,r4,r5) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(load_up_mmu) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ @@ -1028,7 +1032,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * this makes sure it's done. * -- Cort */ -clear_bats: +SYM_FUNC_START_LOCAL(clear_bats) li r10,0 mtspr SPRN_DBAT0U,r10 @@ -1072,6 +1076,7 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_IBAT7L,r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(clear_bats) _GLOBAL(update_bats) lis r4, 1f@h @@ -1108,15 +1113,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtspr SPRN_SRR1, r6 rfi -flush_tlbs: +SYM_FUNC_START_LOCAL(flush_tlbs) lis r10, 0x40 1: addic. r10, r10, -0x1000 tlbie r10 bgt 1b sync blr +SYM_FUNC_END(flush_tlbs) -mmu_off: +SYM_FUNC_START_LOCAL(mmu_off) addi r4, r3, __after_mmu_off - _start mfmsr r3 andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ @@ -1128,9 +1134,10 @@ mmu_off: mtspr SPRN_SRR1,r3 sync rfi +SYM_FUNC_END(mmu_off) /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ -initial_bats: +SYM_FUNC_START_LOCAL(initial_bats) lis r11,PAGE_OFFSET@h tophys(r8,r11) #ifdef CONFIG_SMP @@ -1146,9 +1153,10 @@ initial_bats: mtspr SPRN_IBAT0U,r11 isync blr +SYM_FUNC_END(initial_bats) #ifdef CONFIG_BOOTX_TEXT -setup_disp_bat: +SYM_FUNC_START_LOCAL(setup_disp_bat) /* * setup the display bat prepared for us in prom.c */ @@ -1164,10 +1172,11 @@ setup_disp_bat: mtspr SPRN_DBAT3L,r8 mtspr SPRN_DBAT3U,r11 blr +SYM_FUNC_END(setup_disp_bat) #endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_PPC_EARLY_DEBUG_CPM -setup_cpm_bat: +SYM_FUNC_START_LOCAL(setup_cpm_bat) lis r8, 0xf000 ori r8, r8, 0x002a mtspr SPRN_DBAT1L, r8 @@ -1177,10 +1186,11 @@ setup_cpm_bat: mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_cpm_bat) #endif #ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO -setup_usbgecko_bat: +SYM_FUNC_START_LOCAL(setup_usbgecko_bat) /* prepare a BAT for early io */ #if defined(CONFIG_GAMECUBE) lis r8, 0x0c00 @@ -1199,6 +1209,7 @@ setup_usbgecko_bat: mtspr SPRN_DBAT1L, r8 mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_usbgecko_bat) #endif .data diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index e0cbd63007f21..ffb79326483c0 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include + #include #include #include @@ -400,7 +402,7 @@ _ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. */ -turn_on_mmu: +SYM_FUNC_START_LOCAL(turn_on_mmu) mflr r4 mtsrr0 r4 mtsrr1 r3 @@ -408,4 +410,5 @@ turn_on_mmu: isync rfi _ASM_NOKPROBE_SYMBOL(turn_on_mmu) +SYM_FUNC_END(turn_on_mmu) diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 315c94946bad1..b68e7f26a81f4 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -6,6 +6,8 @@ */ #include +#include + #include #include #include @@ -110,18 +112,22 @@ FPS_THREE_IN(fsel) * R8 = (double*)¶m3 [load_three] * LR = instruction call function */ -fpd_load_three: +SYM_FUNC_START_LOCAL(fpd_load_three) lfd 2,0(r8) /* load param3 */ -fpd_load_two: +SYM_FUNC_START_LOCAL(fpd_load_two) lfd 1,0(r7) /* load param2 */ -fpd_load_one: +SYM_FUNC_START_LOCAL(fpd_load_one) lfd 0,0(r6) /* load param1 */ -fpd_load_none: +SYM_FUNC_START_LOCAL(fpd_load_none) lfd 3,0(r3) /* load up fpscr value */ MTFSF_L(3) lwz r6, 0(r4) /* load cr */ mtcr r6 blr +SYM_FUNC_END(fpd_load_none) +SYM_FUNC_END(fpd_load_one) +SYM_FUNC_END(fpd_load_two) +SYM_FUNC_END(fpd_load_three) /* * End of double instruction processing @@ -131,13 +137,14 @@ fpd_load_none: * R5 = (double*)&result * LR = caller of instruction call function */ -fpd_return: +SYM_FUNC_START_LOCAL(fpd_return) mfcr r6 stfd 0,0(r5) /* save result */ mffs 0 stfd 0,0(r3) /* save new fpscr value */ stw r6,0(r4) /* save new cr value */ blr +SYM_FUNC_END(fpd_return) /* * Double operation with no input operand diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index afee8b1515a8e..0b12647e7b420 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include + #include #include #include @@ -178,7 +180,8 @@ sram_code: /* local udelay in sram is needed */ - udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ +SYM_FUNC_START_LOCAL(udelay) + /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ add r12, r13, r12 /* end */ @@ -187,6 +190,7 @@ sram_code: cmp cr0, r13, r12 blt 1b blr +SYM_FUNC_END(udelay) sram_code_end: @@ -271,7 +275,7 @@ _ASM_NOKPROBE_SYMBOL(lite5200_wakeup) SAVE_SR(n+2, addr+2); \ SAVE_SR(n+3, addr+3); -save_regs: +SYM_FUNC_START_LOCAL(save_regs) stw r0, 0(r4) stw r1, 0x4(r4) stw r2, 0x8(r4) @@ -317,6 +321,7 @@ save_regs: SAVE_SPRN(TBRU, 0x5b) blr +SYM_FUNC_END(save_regs) /* restore registers */ @@ -336,7 +341,7 @@ save_regs: LOAD_SR(n+2, addr+2); \ LOAD_SR(n+3, addr+3); -restore_regs: +SYM_FUNC_START_LOCAL(restore_regs) lis r4, registers@h ori r4, r4, registers@l @@ -393,6 +398,7 @@ restore_regs: blr _ASM_NOKPROBE_SYMBOL(restore_regs) +SYM_FUNC_END(restore_regs) @@ -403,7 +409,7 @@ _ASM_NOKPROBE_SYMBOL(restore_regs) * Flush data cache * Do this by just reading lots of stuff into the cache. */ -flush_data_cache: +SYM_FUNC_START_LOCAL(flush_data_cache) lis r3,CONFIG_KERNEL_START@h ori r3,r3,CONFIG_KERNEL_START@l li r4,NUM_CACHE_LINES @@ -413,3 +419,4 @@ flush_data_cache: addi r3,r3,L1_CACHE_BYTES /* Next line, please */ bdnz 1b blr +SYM_FUNC_END(flush_data_cache) -- GitLab From d0160bd5d389da247fb5affb6a35ea393d22fedb Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:45 +0530 Subject: [PATCH 254/694] powerpc/vdso: Skip objtool from running on VDSO files Do not run objtool on VDSO files, by using OBJECT_FILES_NON_STANDARD. Suggested-by: Christophe Leroy Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-8-sv@linux.ibm.com --- arch/powerpc/kernel/vdso/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index a2e7b0ce5b191..6a977b0d8ffc3 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -102,3 +102,5 @@ quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< + +OBJECT_FILES_NON_STANDARD := y -- GitLab From efb11fdb3e1a9f694fa12b70b21e69e55ec59c36 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:46 +0530 Subject: [PATCH 255/694] objtool: Fix SEGFAULT find_insn() will return NULL in case of failure. Check insn in order to avoid a kernel Oops for NULL pointer dereference. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-9-sv@linux.ibm.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43ec14c29a60c..8427af808221b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -207,7 +207,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; insn = find_insn(file, func->sec, func->offset); - if (!insn->func) + if (!insn || !insn->func) return false; func_for_each_insn(file, func, insn) { -- GitLab From 0646c28b417b7fe307c9da72ca1c508e43b57dc0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:47 +0530 Subject: [PATCH 256/694] objtool: Use target file endianness instead of a compiled constant Some architectures like powerpc support both endianness, it's therefore not possible to fix the endianness via arch/endianness.h because there is no easy way to get the target endianness at build time. Use the endianness recorded in the file objtool is working on. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-10-sv@linux.ibm.com --- .../arch/x86/include/arch/endianness.h | 9 ------ tools/objtool/check.c | 2 +- tools/objtool/include/objtool/endianness.h | 32 +++++++++---------- tools/objtool/orc_dump.c | 11 +++++-- tools/objtool/orc_gen.c | 4 +-- tools/objtool/special.c | 3 +- 6 files changed, 30 insertions(+), 31 deletions(-) delete mode 100644 tools/objtool/arch/x86/include/arch/endianness.h diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h deleted file mode 100644 index 7c362527da205..0000000000000 --- a/tools/objtool/arch/x86/include/arch/endianness.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ARCH_ENDIANNESS_H -#define _ARCH_ENDIANNESS_H - -#include - -#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN - -#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8427af808221b..ad5dab1757014 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2100,7 +2100,7 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfi.cfa.offset = bswap_if_needed(hint->sp_offset); + cfi.cfa.offset = bswap_if_needed(file->elf, hint->sp_offset); cfi.type = hint->type; cfi.end = hint->end; diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h index 10241341eff35..4d2aa9b0fe2fd 100644 --- a/tools/objtool/include/objtool/endianness.h +++ b/tools/objtool/include/objtool/endianness.h @@ -2,33 +2,33 @@ #ifndef _OBJTOOL_ENDIANNESS_H #define _OBJTOOL_ENDIANNESS_H -#include #include #include - -#ifndef __TARGET_BYTE_ORDER -#error undefined arch __TARGET_BYTE_ORDER -#endif - -#if __BYTE_ORDER != __TARGET_BYTE_ORDER -#define __NEED_BSWAP 1 -#else -#define __NEED_BSWAP 0 -#endif +#include /* - * Does a byte swap if target endianness doesn't match the host, i.e. cross + * Does a byte swap if target file endianness doesn't match the host, i.e. cross * compilation for little endian on big endian and vice versa. * To be used for multi-byte values conversion, which are read from / about * to be written to a target native endianness ELF file. */ -#define bswap_if_needed(val) \ +static inline bool need_bswap(struct elf *elf) +{ + return (__BYTE_ORDER == __LITTLE_ENDIAN) ^ + (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB); +} + +#define bswap_if_needed(elf, val) \ ({ \ __typeof__(val) __ret; \ + bool __need_bswap = need_bswap(elf); \ switch (sizeof(val)) { \ - case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ - case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ - case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ + case 8: \ + __ret = __need_bswap ? bswap_64(val) : (val); break; \ + case 4: \ + __ret = __need_bswap ? bswap_32(val) : (val); break; \ + case 2: \ + __ret = __need_bswap ? bswap_16(val) : (val); break; \ default: \ BUILD_BUG(); break; \ } \ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index f5a8508c42d6d..4f1211fec82ce 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -76,6 +76,7 @@ int orc_dump(const char *_objname) GElf_Rela rela; GElf_Sym sym; Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL; + struct elf dummy_elf = {}; objname = _objname; @@ -94,6 +95,12 @@ int orc_dump(const char *_objname) return -1; } + if (!elf64_getehdr(elf)) { + WARN_ELF("elf64_getehdr"); + return -1; + } + memcpy(&dummy_elf.ehdr, elf64_getehdr(elf), sizeof(dummy_elf.ehdr)); + if (elf_getshdrnum(elf, &nr_sections)) { WARN_ELF("elf_getshdrnum"); return -1; @@ -198,11 +205,11 @@ int orc_dump(const char *_objname) printf(" sp:"); - print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset)); + print_reg(orc[i].sp_reg, bswap_if_needed(&dummy_elf, orc[i].sp_offset)); printf(" bp:"); - print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset)); + print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset)); printf(" type:%s end:%d\n", orc_type_name(orc[i].type), orc[i].end); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dd3c64af9db23..1f22b7ebae588 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -97,8 +97,8 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec, /* populate ORC data */ orc = (struct orc_entry *)orc_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); - orc->sp_offset = bswap_if_needed(orc->sp_offset); - orc->bp_offset = bswap_if_needed(orc->bp_offset); + orc->sp_offset = bswap_if_needed(elf, orc->sp_offset); + orc->bp_offset = bswap_if_needed(elf, orc->bp_offset); /* populate reloc for ip */ if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32, diff --git a/tools/objtool/special.c b/tools/objtool/special.c index e2223dd91c379..9c8d827f69afb 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -87,7 +87,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (entry->feature) { unsigned short feature; - feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf + + feature = bswap_if_needed(elf, + *(unsigned short *)(sec->data->d_buf + offset + entry->feature)); arch_handle_alternative(feature, alt); -- GitLab From 86ea7f361537f825a699e86fdc9e49be19f128d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 14 Nov 2022 23:27:48 +0530 Subject: [PATCH 257/694] objtool: Use target file class size instead of a compiled constant In order to allow using objtool on cross-built kernels, determine size of long from elf data instead of using sizeof(long) at build time. For the time being this covers only mcount. [Sathvika Vasireddy: Rename variable "size" to "addrsize" and function "elf_class_size()" to "elf_class_addrsize()", and modify create_mcount_loc_sections() function to follow reverse christmas tree format to order local variable declarations.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Signed-off-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-11-sv@linux.ibm.com --- tools/objtool/check.c | 18 ++++++++++-------- tools/objtool/elf.c | 8 ++++++-- tools/objtool/include/objtool/elf.h | 8 ++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ad5dab1757014..b64518c7c7b48 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -852,9 +852,9 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec; - unsigned long *loc; + int addrsize = elf_class_addrsize(file->elf); struct instruction *insn; + struct section *sec; int idx; sec = find_section_by_name(file->elf, "__mcount_loc"); @@ -871,23 +871,25 @@ static int create_mcount_loc_sections(struct objtool_file *file) list_for_each_entry(insn, &file->mcount_loc_list, call_node) idx++; - sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); + sec = elf_create_section(file->elf, "__mcount_loc", 0, addrsize, idx); if (!sec) return -1; + sec->sh.sh_addralign = addrsize; + idx = 0; list_for_each_entry(insn, &file->mcount_loc_list, call_node) { + void *loc; - loc = (unsigned long *)sec->data->d_buf + idx; - memset(loc, 0, sizeof(unsigned long)); + loc = sec->data->d_buf + idx; + memset(loc, 0, addrsize); - if (elf_add_reloc_to_insn(file->elf, sec, - idx * sizeof(unsigned long), + if (elf_add_reloc_to_insn(file->elf, sec, idx, R_X86_64_64, insn->sec, insn->offset)) return -1; - idx++; + idx += addrsize; } return 0; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7e24b09b1163a..33739865735b5 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -1129,6 +1129,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec { char *relocname; struct section *sec; + int addrsize = elf_class_addrsize(elf); relocname = malloc(strlen(base->name) + strlen(".rela") + 1); if (!relocname) { @@ -1138,7 +1139,10 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec strcpy(relocname, ".rela"); strcat(relocname, base->name); - sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); + if (addrsize == sizeof(u32)) + sec = elf_create_section(elf, relocname, 0, sizeof(Elf32_Rela), 0); + else + sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0); free(relocname); if (!sec) return NULL; @@ -1147,7 +1151,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec sec->base = base; sec->sh.sh_type = SHT_RELA; - sec->sh.sh_addralign = 8; + sec->sh.sh_addralign = addrsize; sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx; sec->sh.sh_info = base->idx; sec->sh.sh_flags = SHF_INFO_LINK; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 16f4067b82aea..78b3aa2e546d5 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -142,6 +142,14 @@ static inline bool has_multiple_files(struct elf *elf) return elf->num_files > 1; } +static inline int elf_class_addrsize(struct elf *elf) +{ + if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32) + return sizeof(u32); + else + return sizeof(u64); +} + struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -- GitLab From 280981d6994e0700abd36647b141e73059851e66 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:49 +0530 Subject: [PATCH 258/694] objtool: Add --mnop as an option to --mcount Some architectures (powerpc) may not support ftrace locations being nop'ed out at build time. Introduce CONFIG_HAVE_OBJTOOL_NOP_MCOUNT for objtool, as a means for architectures to enable nop'ing of ftrace locations. Add --mnop as an option to objtool --mcount, to indicate support for the same. Also, make sure that --mnop can be passed as an option to objtool only when --mcount is passed. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Reviewed-by: Christophe Leroy Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-12-sv@linux.ibm.com --- Makefile | 4 +++- arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 7 +++++++ scripts/Makefile.lib | 3 +++ tools/objtool/builtin-check.c | 14 ++++++++++++++ tools/objtool/check.c | 19 ++++++++++--------- tools/objtool/include/objtool/builtin.h | 1 + 7 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index d148a55bfd0f5..53c2b715d0bf0 100644 --- a/Makefile +++ b/Makefile @@ -933,7 +933,9 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_CC endif endif ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL - CC_FLAGS_USING += -DCC_USING_NOP_MCOUNT + ifdef CONFIG_HAVE_OBJTOOL_NOP_MCOUNT + CC_FLAGS_USING += -DCC_USING_NOP_MCOUNT + endif endif ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT ifdef CONFIG_HAVE_C_RECORDMCOUNT diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67745ceab0dbc..4be7c06a5d189 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -195,6 +195,7 @@ config X86 select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL + select HAVE_OBJTOOL_NOP_MCOUNT if HAVE_OBJTOOL_MCOUNT select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e9e95c790b8ee..2b782321376ac 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,6 +82,13 @@ config HAVE_OBJTOOL_MCOUNT help Arch supports objtool --mcount +config HAVE_OBJTOOL_NOP_MCOUNT + bool + help + Arch supports the objtool options --mcount with --mnop. + An architecture can select this if it wants to enable nop'ing + of ftrace locations. + config HAVE_C_RECORDMCOUNT bool help diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3aa384cec76b8..658f541c27827 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -256,6 +256,9 @@ objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount +ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL +objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT) += --mnop +endif objtool-args-$(CONFIG_UNWINDER_ORC) += --orc objtool-args-$(CONFIG_RETPOLINE) += --retpoline objtool-args-$(CONFIG_RETHUNK) += --rethunk diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 24fbe803a0d32..9bd347d3c2449 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -82,6 +82,7 @@ const struct option check_options[] = { OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"), OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"), OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"), + OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"), OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"), OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"), OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"), @@ -150,6 +151,16 @@ static bool opts_valid(void) return false; } +static bool mnop_opts_valid(void) +{ + if (opts.mnop && !opts.mcount) { + ERROR("--mnop requires --mcount"); + return false; + } + + return true; +} + static bool link_opts_valid(struct objtool_file *file) { if (opts.link) @@ -198,6 +209,9 @@ int objtool_run(int argc, const char **argv) if (!file) return 1; + if (!mnop_opts_valid()) + return 1; + if (!link_opts_valid(file)) return 1; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b64518c7c7b48..71cf4b4ba1da2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1256,17 +1256,18 @@ static void annotate_call_site(struct objtool_file *file, if (opts.mcount && sym->fentry) { if (sibling) WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); + if (opts.mnop) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); - } - - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len)); - insn->type = INSN_NOP; + insn->type = INSN_NOP; + } list_add_tail(&insn->call_node, &file->mcount_loc_list); return; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 42a52f1a0add5..0785707c5a924 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -31,6 +31,7 @@ struct opts { bool backup; bool dryrun; bool link; + bool mnop; bool module; bool no_unreachable; bool sec_address; -- GitLab From de6fbcedf5abce4c321eeb15d7d286b79804b8b6 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:50 +0530 Subject: [PATCH 259/694] objtool: Read special sections with alts only when specific options are selected Call add_special_section_alts() only when stackval or orc or uaccess or noinstr options are passed to objtool. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-13-sv@linux.ibm.com --- tools/objtool/check.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 71cf4b4ba1da2..752a6ffd5c4c9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,9 +2392,11 @@ static int decode_sections(struct objtool_file *file) * Must be before add_jump_destinations(), which depends on 'func' * being set for alternatives, to enable proper sibling call detection. */ - ret = add_special_section_alts(file); - if (ret) - return ret; + if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) { + ret = add_special_section_alts(file); + if (ret) + return ret; + } ret = add_jump_destinations(file); if (ret) -- GitLab From c1449735211dd8c4c2d54fa0ece6890ecbd74e24 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:51 +0530 Subject: [PATCH 260/694] objtool: Use macros to define arch specific reloc types Make relocation types architecture specific. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-14-sv@linux.ibm.com --- tools/objtool/arch/x86/include/arch/elf.h | 2 ++ tools/objtool/check.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h index 69cc4264b28a8..ac14987cf6875 100644 --- a/tools/objtool/arch/x86/include/arch/elf.h +++ b/tools/objtool/arch/x86/include/arch/elf.h @@ -2,5 +2,7 @@ #define _OBJTOOL_ARCH_ELF #define R_NONE R_X86_64_NONE +#define R_ABS64 R_X86_64_64 +#define R_ABS32 R_X86_64_32 #endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 752a6ffd5c4c9..2d7153b5d5d16 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -885,7 +885,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) memset(loc, 0, addrsize); if (elf_add_reloc_to_insn(file->elf, sec, idx, - R_X86_64_64, + addrsize == sizeof(u64) ? R_ABS64 : R_ABS32, insn->sec, insn->offset)) return -1; -- GitLab From 4ca993d498987332ceeedee5380101b84accaf35 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:52 +0530 Subject: [PATCH 261/694] objtool: Add arch specific function arch_ftrace_match() Add architecture specific function to look for relocation records pointing to architecture specific symbols. Suggested-by: Christophe Leroy Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-15-sv@linux.ibm.com --- tools/objtool/arch/x86/decode.c | 5 +++++ tools/objtool/check.c | 2 +- tools/objtool/include/objtool/arch.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 1c253b4b7ce00..af7ad09c926c4 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -23,6 +23,11 @@ #include #include +int arch_ftrace_match(char *name) +{ + return !strcmp(name, "__fentry__"); +} + static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2d7153b5d5d16..7580c66ca5c8e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2316,7 +2316,7 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_rethunk(func)) func->return_thunk = true; - if (!strcmp(func->name, "__fentry__")) + if (arch_ftrace_match(func->name)) func->fentry = true; if (is_profiling_func(func->name)) diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index beb2f3aa94ffc..5149330f400f5 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -69,6 +69,8 @@ struct stack_op { struct instruction; +int arch_ftrace_match(char *name); + void arch_initial_func_cfi_state(struct cfi_init_state *state); int arch_decode_instruction(struct objtool_file *file, const struct section *sec, -- GitLab From e52ec98c5ab18c0710ea22bf52f45e60a725adaf Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:53 +0530 Subject: [PATCH 262/694] objtool/powerpc: Enable objtool to be built on ppc This patch adds [stub] implementations for required functions, inorder to enable objtool build on powerpc. [Christophe Leroy: powerpc: Add missing asm/asm.h for objtool, Use local variables for type and imm in arch_decode_instruction(), Adapt len for prefixed instructions.] Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-16-sv@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/asm.h | 7 ++ tools/objtool/arch/powerpc/Build | 2 + tools/objtool/arch/powerpc/decode.c | 85 +++++++++++++++++++ .../arch/powerpc/include/arch/cfi_regs.h | 11 +++ tools/objtool/arch/powerpc/include/arch/elf.h | 8 ++ .../arch/powerpc/include/arch/special.h | 21 +++++ tools/objtool/arch/powerpc/special.c | 19 +++++ 8 files changed, 154 insertions(+) create mode 100644 arch/powerpc/include/asm/asm.h create mode 100644 tools/objtool/arch/powerpc/Build create mode 100644 tools/objtool/arch/powerpc/decode.c create mode 100644 tools/objtool/arch/powerpc/include/arch/cfi_regs.h create mode 100644 tools/objtool/arch/powerpc/include/arch/elf.h create mode 100644 tools/objtool/arch/powerpc/include/arch/special.h create mode 100644 tools/objtool/arch/powerpc/special.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2fc..12e6c16be54e8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -238,6 +238,7 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES + select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h new file mode 100644 index 0000000000000..86f46b604e9a6 --- /dev/null +++ b/arch/powerpc/include/asm/asm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_ASM_H +#define _ASM_POWERPC_ASM_H + +#define _ASM_PTR " .long " + +#endif /* _ASM_POWERPC_ASM_H */ diff --git a/tools/objtool/arch/powerpc/Build b/tools/objtool/arch/powerpc/Build new file mode 100644 index 0000000000000..d24d5636a5b84 --- /dev/null +++ b/tools/objtool/arch/powerpc/Build @@ -0,0 +1,2 @@ +objtool-y += decode.o +objtool-y += special.o diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c new file mode 100644 index 0000000000000..dcd0975cad6ba --- /dev/null +++ b/tools/objtool/arch/powerpc/decode.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long arch_dest_reloc_offset(int addend) +{ + return addend; +} + +bool arch_callee_saved_reg(unsigned char reg) +{ + return false; +} + +int arch_decode_hint_reg(u8 sp_reg, int *base) +{ + exit(-1); +} + +const char *arch_nop_insn(int len) +{ + exit(-1); +} + +const char *arch_ret_insn(int len) +{ + exit(-1); +} + +int arch_decode_instruction(struct objtool_file *file, const struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, enum insn_type *type, + unsigned long *immediate, + struct list_head *ops_list) +{ + unsigned int opcode; + enum insn_type typ; + unsigned long imm; + u32 insn; + + insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset)); + opcode = insn >> 26; + typ = INSN_OTHER; + imm = 0; + + if (opcode == 1) + *len = 8; + else + *len = 4; + + *type = typ; + *immediate = imm; + + return 0; +} + +unsigned long arch_jump_destination(struct instruction *insn) +{ + return insn->offset + insn->immediate; +} + +void arch_initial_func_cfi_state(struct cfi_init_state *state) +{ + int i; + + for (i = 0; i < CFI_NUM_REGS; i++) { + state->regs[i].base = CFI_UNDEFINED; + state->regs[i].offset = 0; + } + + /* initial CFA (call frame address) */ + state->cfa.base = CFI_SP; + state->cfa.offset = 0; + + /* initial LR (return address) */ + state->regs[CFI_RA].base = CFI_CFA; + state->regs[CFI_RA].offset = 0; +} diff --git a/tools/objtool/arch/powerpc/include/arch/cfi_regs.h b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h new file mode 100644 index 0000000000000..59638ebeafc82 --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_CFI_REGS_H +#define _OBJTOOL_CFI_REGS_H + +#define CFI_BP 1 +#define CFI_SP CFI_BP +#define CFI_RA 32 +#define CFI_NUM_REGS 33 + +#endif diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h new file mode 100644 index 0000000000000..3c8ebb7d2a6bb --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/elf.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_ARCH_ELF +#define _OBJTOOL_ARCH_ELF + +#define R_NONE R_PPC_NONE + +#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/arch/powerpc/include/arch/special.h b/tools/objtool/arch/powerpc/include/arch/special.h new file mode 100644 index 0000000000000..ffef9ada7133d --- /dev/null +++ b/tools/objtool/arch/powerpc/include/arch/special.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PPC_ARCH_SPECIAL_H +#define _PPC_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 8 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 +#define JUMP_KEY_OFFSET 8 + +#define ALT_ENTRY_SIZE 12 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _PPC_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c new file mode 100644 index 0000000000000..d33868147196a --- /dev/null +++ b/tools/objtool/arch/powerpc/special.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include + + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc) +{ + exit(-1); +} + +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn) +{ + exit(-1); +} -- GitLab From c984aef8c8326035570ff6e01d0ff9e79a5dfa76 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Mon, 14 Nov 2022 23:27:54 +0530 Subject: [PATCH 263/694] objtool/powerpc: Add --mcount specific implementation This patch enables objtool --mcount on powerpc, and adds implementation specific to powerpc. Tested-by: Naveen N. Rao Reviewed-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Josh Poimboeuf Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114175754.1131267-17-sv@linux.ibm.com --- arch/powerpc/Kconfig | 1 + tools/objtool/arch/powerpc/decode.c | 16 ++++++++++++++++ tools/objtool/arch/powerpc/include/arch/elf.h | 2 ++ 3 files changed, 19 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 12e6c16be54e8..9c07068ba5e5d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -239,6 +239,7 @@ config PPC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL + select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c index dcd0975cad6ba..01cade98b49e8 100644 --- a/tools/objtool/arch/powerpc/decode.c +++ b/tools/objtool/arch/powerpc/decode.c @@ -9,6 +9,11 @@ #include #include +int arch_ftrace_match(char *name) +{ + return !strcmp(name, "_mcount"); +} + unsigned long arch_dest_reloc_offset(int addend) { return addend; @@ -50,6 +55,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec typ = INSN_OTHER; imm = 0; + switch (opcode) { + case 18: /* b[l][a] */ + if ((insn & 3) == 1) /* bl */ + typ = INSN_CALL; + + imm = insn & 0x3fffffc; + if (imm & 0x2000000) + imm -= 0x4000000; + break; + } + if (opcode == 1) *len = 8; else diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h index 3c8ebb7d2a6bb..73f9ae172fe55 100644 --- a/tools/objtool/arch/powerpc/include/arch/elf.h +++ b/tools/objtool/arch/powerpc/include/arch/elf.h @@ -4,5 +4,7 @@ #define _OBJTOOL_ARCH_ELF #define R_NONE R_PPC_NONE +#define R_ABS64 R_PPC64_ADDR64 +#define R_ABS32 R_PPC_ADDR32 #endif /* _OBJTOOL_ARCH_ELF */ -- GitLab From 5a47cb4df38bee861de37c12aaa4ef5510dd533b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 17 Nov 2022 10:44:21 +0100 Subject: [PATCH 264/694] dt-bindings: arm-smmu: Add SM6350 GPU SMMUv2 SM6350 has a qcom,smmu-v2-style SMMU just for Adreno and friends. Document it. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221117094422.11000-2-konrad.dybcio@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 28f5720824cd3..b28c5c2b0ff23 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -94,6 +94,7 @@ properties: - qcom,sc7180-smmu-v2 - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 + - qcom,sm6350-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 - description: Qcom Adreno GPUs on Google Cheza platform @@ -346,6 +347,7 @@ allOf: compatible: contains: enum: + - qcom,sm6350-smmu-v2 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 then: -- GitLab From 3811a7283a0a07fa84ccde69b3d48115d34e79af Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 17 Nov 2022 10:44:22 +0100 Subject: [PATCH 265/694] iommu/arm-smmu-qcom: Add SM6350 SMMUv2 SM6350 uses a qcom,smmu-v2-style SMMU just for Adreno and friends. Add a compatible for it. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221117094422.11000-3-konrad.dybcio@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index c94daf88c5051..91d404deb1155 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -509,6 +509,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, -- GitLab From 5f18e9f8868c6d4eae71678e7ebd4977b7d8c8cf Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:37 -0500 Subject: [PATCH 266/694] iommu/amd: Fix ivrs_acpihid cmdline parsing code The second (UID) strcmp in acpi_dev_hid_uid_match considers "0" and "00" different, which can prevent device registration. Have the AMD IOMMU driver's ivrs_acpihid parsing code remove any leading zeroes to make the UID strcmp succeed. Now users can safely specify "AMDxxxxx:00" or "AMDxxxxx:0" and expect the same behaviour. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Signed-off-by: Kim Phillips Cc: stable@vger.kernel.org Cc: Suravee Suthikulpanit Cc: Joerg Roedel Link: https://lore.kernel.org/r/20220919155638.391481-1-kim.phillips@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1a2d425bf5687..d14da30b8706f 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3488,6 +3488,13 @@ static int __init parse_ivrs_acpihid(char *str) return 1; } + /* + * Ignore leading zeroes after ':', so e.g., AMDI0095:00 + * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match + */ + while (*uid == '0' && *(uid + 1)) + uid++; + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); -- GitLab From 1198d2316dc4265a97d0e8445a22c7a6d17580a4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:38 -0500 Subject: [PATCH 267/694] iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options Currently, these options cause the following libkmod error: libkmod: ERROR ../libkmod/libkmod-config.c:489 kcmdline_parse_result: \ Ignoring bad option on kernel command line while parsing module \ name: 'ivrs_xxxx[XX:XX' Fix by introducing a new parameter format for these options and throw a warning for the deprecated format. Users are still allowed to omit the PCI Segment if zero. Adding a Link: to the reason why we're modding the syntax parsing in the driver and not in libkmod. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-modules/20200310082308.14318-2-lucas.demarchi@intel.com/ Reported-by: Kim Phillips Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Kim Phillips Link: https://lore.kernel.org/r/20220919155638.391481-2-kim.phillips@amd.com Signed-off-by: Joerg Roedel --- .../admin-guide/kernel-parameters.txt | 27 +++++-- drivers/iommu/amd/init.c | 79 +++++++++++++------ 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774a..bb1c62314f9ec 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2300,7 +2300,13 @@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@ -2312,7 +2318,13 @@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@ -2323,15 +2335,20 @@ ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5 - By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d14da30b8706f..34029d1161073 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3402,18 +3402,24 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + +found: if (early_ioapic_map_size == EARLY_MAP_SIZE) { pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); @@ -3434,18 +3440,24 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + +found: if (early_hpet_map_size == EARLY_MAP_SIZE) { pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); @@ -3466,19 +3478,36 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; - char *hid, *uid, *p; + char *hid, *uid, *p, *addr; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; - int ret, i; - - ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); - if (ret != 4) { - ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); - if (ret != 5) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + int i; + + addr = strchr(str, '@'); + if (!addr) { + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || + sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { + pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", + str, acpiid, seg, bus, dev, fn); + goto found; } + goto not_found; } + /* We have the '@', make it the terminator to get just the acpiid */ + *addr++ = 0; + + if (sscanf(str, "=%s", acpiid) != 1) + goto not_found; + + if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || + sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) + goto found; + +not_found: + pr_err("Invalid command line: ivrs_acpihid%s\n", str); + return 1; + +found: p = acpiid; hid = strsep(&p, ":"); uid = p; -- GitLab From 73b6924cdebc899de9b719e1319aa86c6bed4acf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 18:35:50 +0800 Subject: [PATCH 268/694] iommu/mediatek: Check return value after calling platform_get_resource() platform_get_resource() may return NULL pointer, we need check its return value to avoid null-ptr-deref in resource_size(). Fixes: 42d57fc58aeb ("iommu/mediatek: Initialise/Remove for multi bank dev") Signed-off-by: Yang Yingliang Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221029103550.3774365-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ab2ecfe01f80..2d14dc846b839 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1173,6 +1173,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) banks_num = data->plat_data->banks_num; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; if (resource_size(res) < banks_num * MTK_IOMMU_BANK_SZ) { dev_err(dev, "banknr %d. res %pR is not enough.\n", banks_num, res); return -EINVAL; -- GitLab From 59a316fdc4d564dc5e811321a8b20a444fc0094c Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:07 +0100 Subject: [PATCH 269/694] dt-bindings: iommu: mediatek: add binding documentation for MT8365 SoC Add IOMMU binding documentation for the MT8365 SoC. Signed-off-by: Fabien Parent Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-1-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/mediatek,iommu.yaml | 2 + .../memory/mediatek,mt8365-larb-port.h | 90 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 include/dt-bindings/memory/mediatek,mt8365-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 839e3be0bf3ca..5b6395bc10e0c 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -82,6 +82,7 @@ properties: - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two - mediatek,mt8195-iommu-infra # generation two + - mediatek,mt8365-m4u # generation two - description: mt7623 generation one items: @@ -132,6 +133,7 @@ properties: dt-binding/memory/mt8186-memory-port.h for mt8186, dt-binding/memory/mt8192-larb-port.h for mt8192. dt-binding/memory/mt8195-memory-port.h for mt8195. + dt-binding/memory/mediatek,mt8365-larb-port.h for mt8365. power-domains: maxItems: 1 diff --git a/include/dt-bindings/memory/mediatek,mt8365-larb-port.h b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h new file mode 100644 index 0000000000000..56d5a5dd519e7 --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_ + +#include + +#define M4U_LARB0_ID 0 +#define M4U_LARB1_ID 1 +#define M4U_LARB2_ID 2 +#define M4U_LARB3_ID 3 + +/* larb0 */ +#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0) +#define M4U_PORT_DISP_OVL0_2L MTK_M4U_ID(M4U_LARB0_ID, 1) +#define M4U_PORT_DISP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 2) +#define M4U_PORT_DISP_WDMA0 MTK_M4U_ID(M4U_LARB0_ID, 3) +#define M4U_PORT_DISP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 4) +#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 5) +#define M4U_PORT_MDP_WROT1 MTK_M4U_ID(M4U_LARB0_ID, 6) +#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 7) +#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 8) +#define M4U_PORT_DISP_FAKE0 MTK_M4U_ID(M4U_LARB0_ID, 9) +#define M4U_PORT_APU_READ MTK_M4U_ID(M4U_LARB0_ID, 10) +#define M4U_PORT_APU_WRITE MTK_M4U_ID(M4U_LARB0_ID, 11) + +/* larb1 */ +#define M4U_PORT_VENC_RCPU MTK_M4U_ID(M4U_LARB1_ID, 0) +#define M4U_PORT_VENC_REC MTK_M4U_ID(M4U_LARB1_ID, 1) +#define M4U_PORT_VENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 2) +#define M4U_PORT_VENC_SV_COMV MTK_M4U_ID(M4U_LARB1_ID, 3) +#define M4U_PORT_VENC_RD_COMV MTK_M4U_ID(M4U_LARB1_ID, 4) +#define M4U_PORT_VENC_NBM_RDMA MTK_M4U_ID(M4U_LARB1_ID, 5) +#define M4U_PORT_VENC_NBM_RDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 6) +#define M4U_PORT_JPGENC_Y_RDMA MTK_M4U_ID(M4U_LARB1_ID, 7) +#define M4U_PORT_JPGENC_C_RDMA MTK_M4U_ID(M4U_LARB1_ID, 8) +#define M4U_PORT_JPGENC_Q_TABLE MTK_M4U_ID(M4U_LARB1_ID, 9) +#define M4U_PORT_JPGENC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 10) +#define M4U_PORT_JPGDEC_WDMA MTK_M4U_ID(M4U_LARB1_ID, 11) +#define M4U_PORT_JPGDEC_BSDMA MTK_M4U_ID(M4U_LARB1_ID, 12) +#define M4U_PORT_VENC_NBM_WDMA MTK_M4U_ID(M4U_LARB1_ID, 13) +#define M4U_PORT_VENC_NBM_WDMA_LITE MTK_M4U_ID(M4U_LARB1_ID, 14) +#define M4U_PORT_VENC_CUR_LUMA MTK_M4U_ID(M4U_LARB1_ID, 15) +#define M4U_PORT_VENC_CUR_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 16) +#define M4U_PORT_VENC_REF_LUMA MTK_M4U_ID(M4U_LARB1_ID, 17) +#define M4U_PORT_VENC_REF_CHROMA MTK_M4U_ID(M4U_LARB1_ID, 18) + +/* larb2 */ +#define M4U_PORT_CAM_IMGO MTK_M4U_ID(M4U_LARB2_ID, 0) +#define M4U_PORT_CAM_RRZO MTK_M4U_ID(M4U_LARB2_ID, 1) +#define M4U_PORT_CAM_AAO MTK_M4U_ID(M4U_LARB2_ID, 2) +#define M4U_PORT_CAM_LCS MTK_M4U_ID(M4U_LARB2_ID, 3) +#define M4U_PORT_CAM_ESFKO MTK_M4U_ID(M4U_LARB2_ID, 4) +#define M4U_PORT_CAM_CAM_SV0 MTK_M4U_ID(M4U_LARB2_ID, 5) +#define M4U_PORT_CAM_CAM_SV1 MTK_M4U_ID(M4U_LARB2_ID, 6) +#define M4U_PORT_CAM_LSCI MTK_M4U_ID(M4U_LARB2_ID, 7) +#define M4U_PORT_CAM_LSCI_D MTK_M4U_ID(M4U_LARB2_ID, 8) +#define M4U_PORT_CAM_AFO MTK_M4U_ID(M4U_LARB2_ID, 9) +#define M4U_PORT_CAM_SPARE MTK_M4U_ID(M4U_LARB2_ID, 10) +#define M4U_PORT_CAM_BPCI MTK_M4U_ID(M4U_LARB2_ID, 11) +#define M4U_PORT_CAM_BPCI_D MTK_M4U_ID(M4U_LARB2_ID, 12) +#define M4U_PORT_CAM_UFDI MTK_M4U_ID(M4U_LARB2_ID, 13) +#define M4U_PORT_CAM_IMGI MTK_M4U_ID(M4U_LARB2_ID, 14) +#define M4U_PORT_CAM_IMG2O MTK_M4U_ID(M4U_LARB2_ID, 15) +#define M4U_PORT_CAM_IMG3O MTK_M4U_ID(M4U_LARB2_ID, 16) +#define M4U_PORT_CAM_WPE0_I MTK_M4U_ID(M4U_LARB2_ID, 17) +#define M4U_PORT_CAM_WPE1_I MTK_M4U_ID(M4U_LARB2_ID, 18) +#define M4U_PORT_CAM_WPE_O MTK_M4U_ID(M4U_LARB2_ID, 19) +#define M4U_PORT_CAM_FD0_I MTK_M4U_ID(M4U_LARB2_ID, 20) +#define M4U_PORT_CAM_FD1_I MTK_M4U_ID(M4U_LARB2_ID, 21) +#define M4U_PORT_CAM_FD0_O MTK_M4U_ID(M4U_LARB2_ID, 22) +#define M4U_PORT_CAM_FD1_O MTK_M4U_ID(M4U_LARB2_ID, 23) + +/* larb3 */ +#define M4U_PORT_HW_VDEC_MC_EXT MTK_M4U_ID(M4U_LARB3_ID, 0) +#define M4U_PORT_HW_VDEC_UFO_EXT MTK_M4U_ID(M4U_LARB3_ID, 1) +#define M4U_PORT_HW_VDEC_PP_EXT MTK_M4U_ID(M4U_LARB3_ID, 2) +#define M4U_PORT_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(M4U_LARB3_ID, 3) +#define M4U_PORT_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(M4U_LARB3_ID, 4) +#define M4U_PORT_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(M4U_LARB3_ID, 5) +#define M4U_PORT_HW_VDEC_TILE_EXT MTK_M4U_ID(M4U_LARB3_ID, 6) +#define M4U_PORT_HW_VDEC_VLD_EXT MTK_M4U_ID(M4U_LARB3_ID, 7) +#define M4U_PORT_HW_VDEC_VLD2_EXT MTK_M4U_ID(M4U_LARB3_ID, 8) +#define M4U_PORT_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(M4U_LARB3_ID, 9) +#define M4U_PORT_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(M4U_LARB3_ID, 10) + +#endif -- GitLab From 65df7d824f82f4dd3552b5a62ae8db07f25e423f Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:08 +0100 Subject: [PATCH 270/694] iommu/mediatek: add support for 6-bit encoded port IDs Until now the port ID was always encoded as a 5-bit data. On MT8365, the port ID is encoded as a 6-bit data. This requires to add extra macro F_MMU_INT_ID_LARB_ID_EXT, and F_MMU_INT_ID_PORT_ID_EXT in order to support 6-bit encoded port IDs. Signed-off-by: Fabien Parent Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-2-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2d14dc846b839..885ba5233b99f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -108,8 +108,12 @@ #define F_MMU_INT_ID_SUB_COMM_ID(a) (((a) >> 7) & 0x3) #define F_MMU_INT_ID_COMM_ID_EXT(a) (((a) >> 10) & 0x7) #define F_MMU_INT_ID_SUB_COMM_ID_EXT(a) (((a) >> 7) & 0x7) +/* Macro for 5 bits length port ID field (default) */ #define F_MMU_INT_ID_LARB_ID(a) (((a) >> 7) & 0x7) #define F_MMU_INT_ID_PORT_ID(a) (((a) >> 2) & 0x1f) +/* Macro for 6 bits length port ID field */ +#define F_MMU_INT_ID_LARB_ID_WID_6(a) (((a) >> 8) & 0x7) +#define F_MMU_INT_ID_PORT_ID_WID_6(a) (((a) >> 2) & 0x3f) #define MTK_PROTECT_PA_ALIGN 256 #define MTK_IOMMU_BANK_SZ 0x1000 @@ -139,6 +143,7 @@ #define IFA_IOMMU_PCIE_SUPPORT BIT(16) #define PGTABLE_PA_35_EN BIT(17) #define TF_PORT_TO_ADDR_MT8173 BIT(18) +#define INT_ID_PORT_WIDTH_6 BIT(19) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -441,14 +446,19 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_pa |= (u64)pa34_32 << 32; if (MTK_IOMMU_IS_TYPE(plat_data, MTK_IOMMU_TYPE_MM)) { - fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_2BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); } else if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_3BITS)) { fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval); sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval); + fault_port = F_MMU_INT_ID_PORT_ID(regval); + } else if (MTK_IOMMU_HAS_FLAG(plat_data, INT_ID_PORT_WIDTH_6)) { + fault_port = F_MMU_INT_ID_PORT_ID_WID_6(regval); + fault_larb = F_MMU_INT_ID_LARB_ID_WID_6(regval); } else { + fault_port = F_MMU_INT_ID_PORT_ID(regval); fault_larb = F_MMU_INT_ID_LARB_ID(regval); } fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; -- GitLab From 3cd0e4a34d5a9bcff90e0c104800700346e42658 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Wed, 2 Nov 2022 16:18:09 +0100 Subject: [PATCH 271/694] iommu/mediatek: add support for MT8365 SoC Add IOMMU support for MT8365 SoC. Signed-off-by: Fabien Parent Reviewed-by: Amjad Ouled-Ameur Tested-by: Amjad Ouled-Ameur Signed-off-by: Markus Schneider-Pargmann Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Yong Wu Signed-off-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221001-iommu-support-v6-3-be4fe8da254b@baylibre.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 885ba5233b99f..c80f33dd2d434 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -170,6 +170,7 @@ enum mtk_iommu_plat { M4U_MT8186, M4U_MT8192, M4U_MT8195, + M4U_MT8365, }; struct mtk_iommu_iova_region { @@ -1528,6 +1529,17 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = { {4, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 6}}, }; +static const struct mtk_iommu_plat_data mt8365_data = { + .m4u_plat = M4U_MT8365, + .flags = RESET_AXI | INT_ID_PORT_WIDTH_6, + .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), + .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, @@ -1540,6 +1552,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, { .compatible = "mediatek,mt8195-iommu-vpp", .data = &mt8195_data_vpp}, + { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data}, {} }; -- GitLab From 01657bc14a3990c665375f77978631fee77b1fce Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Nov 2022 19:51:43 +0000 Subject: [PATCH 272/694] iommu: Avoid races around device probe We currently have 3 different ways that __iommu_probe_device() may be called, but no real guarantee that multiple callers can't tread on each other, especially once asynchronous driver probe gets involved. It would likely have taken a fair bit of luck to hit this previously, but commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") ups the odds since now it's not just omap-iommu that may trigger multiple bus_iommu_probe() calls in parallel if probing asynchronously. Add a lock to ensure we can't try to double-probe a device, and also close some possible race windows to make sure we're truly robust against trying to double-initialise a group via two different member devices. Reported-by: Brian Norris Signed-off-by: Robin Murphy Tested-by: Brian Norris Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Link: https://lore.kernel.org/r/1946ef9f774851732eed78760a78ec40dbc6d178.1667591503.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6ca377f4fbf9e..7c99d8eb3182a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -306,13 +306,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); int ret; if (!ops) return -ENODEV; - - if (!dev_iommu_get(dev)) - return -ENOMEM; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); + if (!dev_iommu_get(dev)) { + ret = -ENOMEM; + goto err_unlock; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -333,11 +343,14 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list ret = PTR_ERR(group); goto out_release; } - iommu_group_put(group); + mutex_lock(&group->mutex); if (group_list && !group->default_domain && list_empty(&group->entry)) list_add_tail(&group->entry, group_list); + mutex_unlock(&group->mutex); + iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); iommu_device_link(iommu_dev, dev); return 0; @@ -352,6 +365,9 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list err_free: dev_iommu_free(dev); +err_unlock: + mutex_unlock(&iommu_probe_device_lock); + return ret; } @@ -1824,11 +1840,11 @@ int bus_iommu_probe(struct bus_type *bus) return ret; list_for_each_entry_safe(group, next, &group_list, entry) { + mutex_lock(&group->mutex); + /* Remove item from the list */ list_del_init(&group->entry); - mutex_lock(&group->mutex); - /* Try to allocate default domain */ probe_alloc_default_domain(bus, group); -- GitLab From 59bbf596791b89c7f88fdcac29dfc39c1221d25d Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:28:59 +0100 Subject: [PATCH 273/694] iommu/s390: Make attach succeed even if the device is in error state If a zPCI device is in the error state while switching IOMMU domains zpci_register_ioat() will fail and we would end up with the device not attached to any domain. In this state since zdev->dma_table == NULL a reset via zpci_hot_reset_device() would wrongfully re-initialize the device for DMA API usage using zpci_dma_init_device(). As automatic recovery is currently disabled while attached to an IOMMU domain this only affects slot resets triggered through other means but will affect automatic recovery once we switch to using dma-iommu. Additionally with that switch common code expects attaching to the default domain to always work so zpci_register_ioat() should only fail if there is no chance to recover anyway, e.g. if the device has been unplugged. Improve the robustness of attach by specifically looking at the status returned by zpci_mod_fc() to determine if the device is unavailable and in this case simply ignore the error. Once the device is reset zpci_hot_reset_device() will then correctly set the domain's DMA translation tables. Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221109142903.4080275-2-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 2 +- arch/s390/kvm/pci.c | 6 ++++-- arch/s390/pci/pci.c | 11 ++++++----- arch/s390/pci/pci_dma.c | 3 ++- drivers/iommu/s390-iommu.c | 9 +++++++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 15f8714ca9b7c..07361e2fd8c51 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev); bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_hot_reset_device(struct zpci_dev *zdev); -int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); void zpci_update_fh(struct zpci_dev *zdev, u32 fh); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0aec..03964c0e1fdf3 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; + u8 status; int rc; if (!zdev) @@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) /* Re-register the IOMMU that was already created */ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); if (rc) goto clear_gisa; @@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; + u8 status; if (!zdev) return; @@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) /* Re-register the IOMMU that was already created */ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 73cdc55393847..a703dcd94a689 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, - u64 base, u64 limit, u64 iota) + u64 base, u64 limit, u64 iota, u8 *status) { u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); struct zpci_fib fib = {0}; - u8 cc, status; + u8 cc; WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; - cc = zpci_mod_fc(req, &fib, &status); + cc = zpci_mod_fc(req, &fib, status); if (cc) - zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); + zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status); return cc; } EXPORT_SYMBOL_GPL(zpci_register_ioat); @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { + u8 status; int rc; zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); @@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table)); + virt_to_phys(zdev->dma_table), &status); else rc = zpci_dma_init_device(zdev); if (rc) { diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 227cf0a62800b..dee825ee73052 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { + u8 status; int rc; /* @@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) } if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table))) { + virt_to_phys(zdev->dma_table), &status)) { rc = -EIO; goto free_bitmap; } diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 7fb512bece9a8..e2c886bc43761 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; + u8 status; int cc; if (!zdev) @@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, zpci_dma_exit_device(zdev); cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(s390_domain->dma_table)); - if (cc) + virt_to_phys(s390_domain->dma_table), &status); + /* + * If the device is undergoing error recovery the reset code + * will re-establish the new domain. + */ + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; zdev->dma_table = s390_domain->dma_table; -- GitLab From c228f5a043370ef02867e4f0aab1bdc8422500e6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:00 +0100 Subject: [PATCH 274/694] iommu/s390: Add I/O TLB ops Currently s390-iommu does an I/O TLB flush (RPCIT) for every update of the I/O translation table explicitly. For one this is wasteful since RPCIT can be skipped after a mapping operation if zdev->tlb_refresh is unset. Moreover we can do a single RPCIT for a range of pages including whne doing lazy unmapping. Thankfully both of these optimizations can be achieved by implementing the IOMMU operations common code provides for the different types of I/O tlb flushes: * flush_iotlb_all: Flushes the I/O TLB for the entire IOVA space * iotlb_sync: Flushes the I/O TLB for a range of pages that can be gathered up, for example to implement lazy unmapping. * iotlb_sync_map: Flushes the I/O TLB after a mapping operation Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-3-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 67 +++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index e2c886bc43761..9771bce86e94c 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -199,14 +199,63 @@ static void s390_iommu_release_device(struct device *dev) __s390_iommu_detach_device(zdev); } +static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + unsigned long flags; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + +static void s390_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + size_t size = gather->end - gather->start + 1; + struct zpci_dev *zdev; + unsigned long flags; + + /* If gather was never added to there is nothing to flush */ + if (!gather->end) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + zpci_refresh_trans((u64)zdev->fh << 32, gather->start, + size); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + +static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev; + unsigned long flags; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + if (!zdev->tlb_refresh) + continue; + zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); +} + static int s390_iommu_update_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; - dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags, i; - struct zpci_dev *zdev; unsigned long *entry; int rc = 0; @@ -225,15 +274,6 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, dma_addr += PAGE_SIZE; } - spin_lock(&s390_domain->list_lock); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { - rc = zpci_refresh_trans((u64)zdev->fh << 32, - start_dma_addr, nr_pages * PAGE_SIZE); - if (rc) - break; - } - spin_unlock(&s390_domain->list_lock); - undo_cpu_trans: if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { flags = ZPCI_PTE_INVALID; @@ -340,6 +380,8 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, if (rc) return 0; + iommu_iotlb_gather_add_range(gather, iova, size); + return size; } @@ -384,6 +426,9 @@ static const struct iommu_ops s390_iommu_ops = { .detach_dev = s390_iommu_detach_device, .map_pages = s390_iommu_map_pages, .unmap_pages = s390_iommu_unmap_pages, + .flush_iotlb_all = s390_iommu_flush_iotlb_all, + .iotlb_sync = s390_iommu_iotlb_sync, + .iotlb_sync_map = s390_iommu_iotlb_sync_map, .iova_to_phys = s390_iommu_iova_to_phys, .free = s390_domain_free, } -- GitLab From 2ba8336dab5fb81452aea9c21dfc870050a017f3 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:01 +0100 Subject: [PATCH 275/694] iommu/s390: Use RCU to allow concurrent domain_list iteration The s390_domain->devices list is only added to when new devices are attached but is iterated through in read-only fashion for every mapping operation as well as for I/O TLB flushes and thus in performance critical code causing contention on the s390_domain->list_lock. Fortunately such a read-mostly linked list is a standard use case for RCU. This change closely follows the example fpr RCU protected list given in Documentation/RCU/listRCU.rst. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-4-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 2 +- drivers/iommu/s390-iommu.c | 44 +++++++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 07361e2fd8c51..e4c3e4e04d306 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -119,6 +119,7 @@ struct zpci_dev { struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ struct list_head iommu_list; struct kref kref; + struct rcu_head rcu; struct hotplug_slot hotplug_slot; enum zpci_state state; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a703dcd94a689..ef38b1514c77a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -996,7 +996,7 @@ void zpci_release_device(struct kref *kref) break; } zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + kfree_rcu(zdev, rcu); } int zpci_report_error(struct pci_dev *pdev, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 9771bce86e94c..cf5dcbcea4e03 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include static const struct iommu_ops s390_iommu_ops; @@ -20,6 +22,7 @@ struct s390_domain { unsigned long *dma_table; spinlock_t dma_table_lock; spinlock_t list_lock; + struct rcu_head rcu; }; static struct s390_domain *to_s390_domain(struct iommu_domain *dom) @@ -61,18 +64,28 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); - INIT_LIST_HEAD(&s390_domain->devices); + INIT_LIST_HEAD_RCU(&s390_domain->devices); return &s390_domain->domain; } +static void s390_iommu_rcu_free_domain(struct rcu_head *head) +{ + struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu); + + dma_cleanup_tables(s390_domain->dma_table); + kfree(s390_domain); +} + static void s390_domain_free(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); + rcu_read_lock(); WARN_ON(!list_empty(&s390_domain->devices)); - dma_cleanup_tables(s390_domain->dma_table); - kfree(s390_domain); + rcu_read_unlock(); + + call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); } static void __s390_iommu_detach_device(struct zpci_dev *zdev) @@ -84,7 +97,7 @@ static void __s390_iommu_detach_device(struct zpci_dev *zdev) return; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_del_init(&zdev->iommu_list); + list_del_rcu(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); zpci_unregister_ioat(zdev, 0); @@ -127,7 +140,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, zdev->s390_domain = s390_domain; spin_lock_irqsave(&s390_domain->list_lock, flags); - list_add(&zdev->iommu_list, &s390_domain->devices); + list_add_rcu(&zdev->iommu_list, &s390_domain->devices); spin_unlock_irqrestore(&s390_domain->list_lock, flags); return 0; @@ -203,14 +216,13 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; - unsigned long flags; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, zdev->end_dma - zdev->start_dma + 1); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static void s390_iommu_iotlb_sync(struct iommu_domain *domain, @@ -219,18 +231,17 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain, struct s390_domain *s390_domain = to_s390_domain(domain); size_t size = gather->end - gather->start + 1; struct zpci_dev *zdev; - unsigned long flags; /* If gather was never added to there is nothing to flush */ if (!gather->end) return; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { zpci_refresh_trans((u64)zdev->fh << 32, gather->start, size); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, @@ -238,16 +249,15 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; - unsigned long flags; - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_for_each_entry(zdev, &s390_domain->devices, iommu_list) { + rcu_read_lock(); + list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { if (!zdev->tlb_refresh) continue; zpci_refresh_trans((u64)zdev->fh << 32, iova, size); } - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + rcu_read_unlock(); } static int s390_iommu_update_trans(struct s390_domain *s390_domain, -- GitLab From 08955af0600303455f57fe2f2a26f24f9b496b49 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:02 +0100 Subject: [PATCH 276/694] iommu/s390: Optimize IOMMU table walking When invalidating existing table entries for unmap there is no need to know the physical address beforehand so don't do an extra walk of the IOMMU table to get it. Also when invalidating entries not finding an entry indicates an invalid unmap and not a lack of memory we also don't need to undo updates in this case. Implement this by splitting s390_iommu_update_trans() in a variant for validating and one for invalidating translations. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-5-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 69 ++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index cf5dcbcea4e03..2b9a3e3bc6066 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -260,14 +260,14 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, rcu_read_unlock(); } -static int s390_iommu_update_trans(struct s390_domain *s390_domain, - phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) +static int s390_iommu_validate_trans(struct s390_domain *s390_domain, + phys_addr_t pa, dma_addr_t dma_addr, + unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; unsigned long irq_flags, i; unsigned long *entry; - int rc = 0; + int rc; if (!nr_pages) return 0; @@ -275,7 +275,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); - if (!entry) { + if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; } @@ -283,19 +283,43 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain, page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return 0; undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); + while (i-- > 0) { + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(s390_domain->dma_table, + dma_addr); + if (!entry) + break; + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return rc; +} + +static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, + dma_addr_t dma_addr, unsigned long nr_pages) +{ + unsigned long irq_flags, i; + unsigned long *entry; + int rc = 0; + + if (!nr_pages) + return 0; + + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); + for (i = 0; i < nr_pages; i++) { + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + if (unlikely(!entry)) { + rc = -EINVAL; + break; } + dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); + dma_addr += PAGE_SIZE; } spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); @@ -308,8 +332,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, int prot, gfp_t gfp, size_t *mapped) { struct s390_domain *s390_domain = to_s390_domain(domain); - int flags = ZPCI_PTE_VALID, rc = 0; size_t size = pgcount << __ffs(pgsize); + int flags = ZPCI_PTE_VALID, rc = 0; if (pgsize != SZ_4K) return -EINVAL; @@ -327,8 +351,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, if (!(prot & IOMMU_WRITE)) flags |= ZPCI_TABLE_PROTECTED; - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - pgcount, flags); + rc = s390_iommu_validate_trans(s390_domain, paddr, iova, + pgcount, flags); if (!rc) *mapped = size; @@ -373,20 +397,13 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, { struct s390_domain *s390_domain = to_s390_domain(domain); size_t size = pgcount << __ffs(pgsize); - int flags = ZPCI_PTE_INVALID; - phys_addr_t paddr; int rc; if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start || (iova + size - 1) > s390_domain->domain.geometry.aperture_end)) return 0; - paddr = s390_iommu_iova_to_phys(domain, iova); - if (!paddr) - return 0; - - rc = s390_iommu_update_trans(s390_domain, paddr, iova, - pgcount, flags); + rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount); if (rc) return 0; -- GitLab From 21c1f9021f0e7d28c3edfcc70e1ca1926ea3774e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 9 Nov 2022 15:29:03 +0100 Subject: [PATCH 277/694] s390/pci: use lock-free I/O translation updates I/O translation tables on s390 use 8 byte page table entries and tables which are allocated lazily but only freed when the entire I/O translation table is torn down. Also each IOVA can at any time only translate to one physical address Furthermore I/O table accesses by the IOMMU hardware are cache coherent. With a bit of care we can thus use atomic updates to manipulate the translation table without having to use a global lock at all. This is done analogous to the existing I/O translation table handling code used on Intel and AMD x86 systems. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20221109142903.4080275-6-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_dma.c | 74 ++++++++++++++++++++++--------------- drivers/iommu/s390-iommu.c | 37 +++++++------------ 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index e4c3e4e04d306..b248694e00247 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -157,7 +157,6 @@ struct zpci_dev { /* DMA stuff */ unsigned long *dma_table; - spinlock_t dma_table_lock; int tlb_refresh; spinlock_t iommu_bitmap_lock; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index dee825ee73052..ea478d11fbd13 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -63,37 +63,55 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) { + unsigned long old_rte, rte; unsigned long *sto; - if (reg_entry_isvalid(*entry)) - sto = get_rt_sto(*entry); - else { + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { sto = dma_alloc_cpu_table(); if (!sto) return NULL; - set_rt_sto(entry, virt_to_phys(sto)); - validate_rt_entry(entry); - entry_clr_protected(entry); + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } } return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *entry) +static unsigned long *dma_get_page_table_origin(unsigned long *step) { + unsigned long old_ste, ste; unsigned long *pto; - if (reg_entry_isvalid(*entry)) - pto = get_st_pto(*entry); - else { + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { pto = dma_alloc_page_table(); if (!pto) return NULL; - set_st_pto(entry, virt_to_phys(pto)); - validate_st_entry(entry); - entry_clr_protected(entry); + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } } return pto; } @@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) return &pto[px]; } -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags) +void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) { + unsigned long pte; + + pte = READ_ONCE(*ptep); if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(entry); + invalidate_pt_entry(&pte); } else { - set_pt_pfaa(entry, page_addr); - validate_pt_entry(entry); + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); } if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(entry); + entry_set_protected(&pte); else - entry_clr_protected(entry); + entry_clr_protected(&pte); + + xchg(ptep, pte); } static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, @@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, { unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long irq_flags; unsigned long *entry; int i, rc = 0; if (!nr_pages) return -EINVAL; - spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); - if (!zdev->dma_table) { - rc = -EINVAL; - goto out_unlock; - } + if (!zdev->dma_table) + return -EINVAL; for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); @@ -173,8 +192,6 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, dma_update_cpu_trans(entry, page_addr, flags); } } -out_unlock: - spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } @@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev) WARN_ON(zdev->s390_domain); spin_lock_init(&zdev->iommu_bitmap_lock); - spin_lock_init(&zdev->dma_table_lock); zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 2b9a3e3bc6066..ed33c6cce0836 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -20,7 +20,6 @@ struct s390_domain { struct iommu_domain domain; struct list_head devices; unsigned long *dma_table; - spinlock_t dma_table_lock; spinlock_t list_lock; struct rcu_head rcu; }; @@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) s390_domain->domain.geometry.aperture_start = 0; s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; - spin_lock_init(&s390_domain->dma_table_lock); spin_lock_init(&s390_domain->list_lock); INIT_LIST_HEAD_RCU(&s390_domain->devices); @@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, unsigned long nr_pages, int flags) { phys_addr_t page_addr = pa & PAGE_MASK; - unsigned long irq_flags, i; unsigned long *entry; + unsigned long i; int rc; - if (!nr_pages) - return 0; - - spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); if (unlikely(!entry)) { @@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return 0; @@ -296,7 +289,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; } @@ -304,14 +296,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, dma_addr_t dma_addr, unsigned long nr_pages) { - unsigned long irq_flags, i; unsigned long *entry; + unsigned long i; int rc = 0; - if (!nr_pages) - return 0; - - spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); for (i = 0; i < nr_pages; i++) { entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); if (unlikely(!entry)) { @@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); dma_addr += PAGE_SIZE; } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); return rc; } @@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct s390_domain *s390_domain = to_s390_domain(domain); - unsigned long *sto, *pto, *rto, flags; + unsigned long *rto, *sto, *pto; + unsigned long ste, pte, rte; unsigned int rtx, sx, px; phys_addr_t phys = 0; @@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, px = calc_px(iova); rto = s390_domain->dma_table; - spin_lock_irqsave(&s390_domain->dma_table_lock, flags); - if (rto && reg_entry_isvalid(rto[rtx])) { - sto = get_rt_sto(rto[rtx]); - if (sto && reg_entry_isvalid(sto[sx])) { - pto = get_st_pto(sto[sx]); - if (pto && pt_entry_isvalid(pto[px])) - phys = pto[px] & ZPCI_PTE_ADDR_MASK; + rte = READ_ONCE(rto[rtx]); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + ste = READ_ONCE(sto[sx]); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + pte = READ_ONCE(pto[px]); + if (pt_entry_isvalid(pte)) + phys = pte & ZPCI_PTE_ADDR_MASK; } } - spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); return phys; } -- GitLab From bbc4d205d93f52ee18dfa7858d51489c0506547f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 10 Nov 2022 16:44:07 +0100 Subject: [PATCH 278/694] iommu/exynos: Fix driver initialization sequence Registering a SYSMMU platform driver might directly trigger initializing IOMMU domains and performing the initial mappings. Also the IOMMU core might use the IOMMU hardware once it has been registered with iommu_device_register() function. Ensure that all driver resources are allocated and initialized before the driver advertise its presence to the platform bus and the IOMMU subsystem. Signed-off-by: Marek Szyprowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20221110154407.26531-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 45fd4850bacbd..b0cde22119875 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -708,10 +708,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); - if (ret) - goto err_iommu_register; - platform_set_drvdata(pdev, data); if (PG_ENT_SHIFT < 0) { @@ -743,11 +739,13 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); + ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); + if (ret) + goto err_dma_set_mask; + return 0; err_dma_set_mask: - iommu_device_unregister(&data->iommu); -err_iommu_register: iommu_device_sysfs_remove(&data->iommu); return ret; } @@ -1432,12 +1430,6 @@ static int __init exynos_iommu_init(void) return -ENOMEM; } - ret = platform_driver_register(&exynos_sysmmu_driver); - if (ret) { - pr_err("%s: Failed to register driver\n", __func__); - goto err_reg_driver; - } - zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL); if (zero_lv2_table == NULL) { pr_err("%s: Failed to allocate zero level2 page table\n", @@ -1446,10 +1438,16 @@ static int __init exynos_iommu_init(void) goto err_zero_lv2; } + ret = platform_driver_register(&exynos_sysmmu_driver); + if (ret) { + pr_err("%s: Failed to register driver\n", __func__); + goto err_reg_driver; + } + return 0; -err_zero_lv2: - platform_driver_unregister(&exynos_sysmmu_driver); err_reg_driver: + platform_driver_unregister(&exynos_sysmmu_driver); +err_zero_lv2: kmem_cache_destroy(lv2table_kmem_cache); return ret; } -- GitLab From b577f7e679b763b706032e7a65c7b3a05c5f2184 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:35 +0000 Subject: [PATCH 279/694] iommu/mediatek-v1: Update to {map,unmap}_pages Now that the core API has a proper notion of multi-page mappings, clean up the old pgsize_bitmap hack by implementing the new interfaces instead. This also brings a slight simplification since we no longer need to worry about rolling back partial mappings on failure. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/768e90ff0c2d61e4723049c1349d8bac58daa437.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6e0e65831eb70..69682ee068d2b 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -327,44 +327,42 @@ static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct devic } static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; unsigned long flags; unsigned int i; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); u32 pabase = (u32)paddr; - int map_size = 0; spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { - if (pgt_base_iova[i]) { - memset(pgt_base_iova, 0, i * sizeof(u32)); + for (i = 0; i < pgcount; i++) { + if (pgt_base_iova[i]) break; - } pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC; pabase += MT2701_IOMMU_PAGE_SIZE; - map_size += MT2701_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); - mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); + *mapped = i * MT2701_IOMMU_PAGE_SIZE; + mtk_iommu_v1_tlb_flush_range(dom->data, iova, *mapped); - return map_size == size ? 0 : -EEXIST; + return i == pgcount ? 0 : -EEXIST; } static size_t mtk_iommu_v1_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain); unsigned long flags; u32 *pgt_base_iova = dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT); - unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT; + size_t size = pgcount * MT2701_IOMMU_PAGE_SIZE; spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); mtk_iommu_v1_tlb_flush_range(dom->data, iova, size); @@ -586,13 +584,13 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .release_device = mtk_iommu_v1_release_device, .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, - .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, .detach_dev = mtk_iommu_v1_detach_device, - .map = mtk_iommu_v1_map, - .unmap = mtk_iommu_v1_unmap, + .map_pages = mtk_iommu_v1_map, + .unmap_pages = mtk_iommu_v1_unmap, .iova_to_phys = mtk_iommu_v1_iova_to_phys, .free = mtk_iommu_v1_domain_free, } -- GitLab From a05d5857cec3efef02af557dcb5ed257364356e6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:36 +0000 Subject: [PATCH 280/694] iommu/sprd: Update to {map,unmap}_pages Now that the core API has a proper notion of multi-page mappings, clean up the old pgsize_bitmap hack by implementing the new interfaces instead. This time we'll get the return values for unmaps correct too. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/9026464e8380b92d10d09103e215eb4306a5df7c.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/sprd-iommu.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index e027933755989..219bfa11f7f48 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -271,10 +271,11 @@ static void sprd_iommu_detach_device(struct iommu_domain *domain, } static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long flags; unsigned int i; u32 *pgt_base_iova; @@ -296,35 +297,37 @@ static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - for (i = 0; i < page_num; i++) { + for (i = 0; i < pgcount; i++) { pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT; pabase += SPRD_IOMMU_PAGE_SIZE; } spin_unlock_irqrestore(&dom->pgtlock, flags); + *mapped = size; return 0; } static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned long flags; u32 *pgt_base_iova; - unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE; unsigned long start = domain->geometry.aperture_start; unsigned long end = domain->geometry.aperture_end; if (iova < start || (iova + size) > (end + 1)) - return -EINVAL; + return 0; pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); spin_lock_irqsave(&dom->pgtlock, flags); - memset(pgt_base_iova, 0, page_num * sizeof(u32)); + memset(pgt_base_iova, 0, pgcount * sizeof(u32)); spin_unlock_irqrestore(&dom->pgtlock, flags); - return 0; + return size; } static void sprd_iommu_sync_map(struct iommu_domain *domain, @@ -407,13 +410,13 @@ static const struct iommu_ops sprd_iommu_ops = { .probe_device = sprd_iommu_probe_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, - .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, + .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, .detach_dev = sprd_iommu_detach_device, - .map = sprd_iommu_map, - .unmap = sprd_iommu_unmap, + .map_pages = sprd_iommu_map, + .unmap_pages = sprd_iommu_unmap, .iotlb_sync_map = sprd_iommu_sync_map, .iotlb_sync = sprd_iommu_sync, .iova_to_phys = sprd_iommu_iova_to_phys, -- GitLab From 85637380dad6d97071018cba6f2aa90667f716b3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:37 +0000 Subject: [PATCH 281/694] iommu/mediatek: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/25b65b71e7e5d1006469aee48bab07ca87227bfa.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b383c8327f9cb..6b8ad85b50ed3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -711,7 +711,8 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, } static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); @@ -720,17 +721,17 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ - return dom->iop->map(dom->iop, iova, paddr, size, prot, gfp); + return dom->iop->map_pages(dom->iop, iova, paddr, pgsize, pgcount, prot, gfp, mapped); } static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size, + unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - iommu_iotlb_gather_add_range(gather, iova, size); - return dom->iop->unmap(dom->iop, iova, size, gather); + iommu_iotlb_gather_add_range(gather, iova, pgsize * pgcount); + return dom->iop->unmap_pages(dom->iop, iova, pgsize, pgcount, gather); } static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) @@ -938,8 +939,8 @@ static const struct iommu_ops mtk_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, .detach_dev = mtk_iommu_detach_device, - .map = mtk_iommu_map, - .unmap = mtk_iommu_unmap, + .map_pages = mtk_iommu_map, + .unmap_pages = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, .iotlb_sync_map = mtk_iommu_sync_map, -- GitLab From 8b35cdcf9bf82098dd15ed02a2a51cdf5f5ca090 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:38 +0000 Subject: [PATCH 282/694] iommu/msm: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/24a8f522710ddd6bbac4da154aa28799e939ebe4.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 16179a9a72830..c60624910872c 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -471,14 +471,16 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain, } static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t pa, size_t len, int prot, gfp_t gfp) + phys_addr_t pa, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; int ret; spin_lock_irqsave(&priv->pgtlock, flags); - ret = priv->iop->map(priv->iop, iova, pa, len, prot, GFP_ATOMIC); + ret = priv->iop->map_pages(priv->iop, iova, pa, pgsize, pgcount, prot, + GFP_ATOMIC, mapped); spin_unlock_irqrestore(&priv->pgtlock, flags); return ret; @@ -493,16 +495,18 @@ static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t len, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; + size_t ret; spin_lock_irqsave(&priv->pgtlock, flags); - len = priv->iop->unmap(priv->iop, iova, len, gather); + ret = priv->iop->unmap_pages(priv->iop, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&priv->pgtlock, flags); - return len; + return ret; } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, @@ -679,8 +683,8 @@ static struct iommu_ops msm_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, .detach_dev = msm_iommu_detach_dev, - .map = msm_iommu_map, - .unmap = msm_iommu_unmap, + .map_pages = msm_iommu_map, + .unmap_pages = msm_iommu_unmap, /* * Nothing is needed here, the barrier to guarantee * completion of the tlb sync operation is implicitly -- GitLab From 0a17bbab2330aecd026696d4decc2636bd31e790 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:39 +0000 Subject: [PATCH 283/694] iommu/ipmmu-vmsa: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Since these are domain ops now, the domain is inherently valid (not to mention that container_of() wouldn't return NULL anyway), so garbage-collect that check in the process. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/ad859ccc24720d72f8eafd03817c1fc11255ddc1.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 22230cc15dcd1..a003bd5fc65c1 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -659,22 +659,22 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain, } static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - if (!domain) - return -ENODEV; - - return domain->iop->map(domain->iop, iova, paddr, size, prot, gfp); + return domain->iop->map_pages(domain->iop, iova, paddr, pgsize, pgcount, + prot, gfp, mapped); } static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - return domain->iop->unmap(domain->iop, iova, size, gather); + return domain->iop->unmap_pages(domain->iop, iova, pgsize, pgcount, gather); } static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain) @@ -877,8 +877,8 @@ static const struct iommu_ops ipmmu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, .detach_dev = ipmmu_detach_device, - .map = ipmmu_map, - .unmap = ipmmu_unmap, + .map_pages = ipmmu_map, + .unmap_pages = ipmmu_unmap, .flush_iotlb_all = ipmmu_flush_iotlb_all, .iotlb_sync = ipmmu_iotlb_sync, .iova_to_phys = ipmmu_iova_to_phys, -- GitLab From fa8ce5743039bc7ea5cb4217423efaebe381fc54 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:40 +0000 Subject: [PATCH 284/694] iommu/qcom: Update to {map,unmap}_pages Update map/unmap to the new multi-page interfaces, which is dead easy since we just pass them through to io-pgtable anyway. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/ccff9a133d12ec938741720be6baf5d788b71ea0.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index bfd7b51eb5dbf..270c3d9128bab 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -410,7 +410,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { int ret; unsigned long flags; @@ -421,13 +422,14 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot, GFP_ATOMIC); + ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_ATOMIC, mapped); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); return ret; } static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { size_t ret; unsigned long flags; @@ -444,7 +446,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, */ pm_runtime_get_sync(qcom_domain->iommu->dev); spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size, gather); + ret = ops->unmap_pages(ops, iova, pgsize, pgcount, gather); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); pm_runtime_put_sync(qcom_domain->iommu->dev); @@ -582,8 +584,8 @@ static const struct iommu_ops qcom_iommu_ops = { .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, .detach_dev = qcom_iommu_detach_dev, - .map = qcom_iommu_map, - .unmap = qcom_iommu_unmap, + .map_pages = qcom_iommu_map, + .unmap_pages = qcom_iommu_unmap, .flush_iotlb_all = qcom_iommu_flush_iotlb_all, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, -- GitLab From 99cbb8e436344ddd0554108a3d8afb7ce5c4994e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:41 +0000 Subject: [PATCH 285/694] iommu/io-pgtable-arm: Remove map/unmap With all users now calling {map,unmap}_pages, remove the wrappers. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/162e58e83ed42f78c3fbefe78c9b5410dd1dc412.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 42 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0ba817e863465..72dcdd468cf30 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -360,7 +360,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; num_entries = min_t(int, pgcount, max_entries); ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); - if (!ret && mapped) + if (!ret) *mapped += num_entries * size; return ret; @@ -496,13 +496,6 @@ static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp) -{ - return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp, - NULL); -} - static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *ptep) { @@ -682,12 +675,6 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } -static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_lpae_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -799,9 +786,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); data->iop.ops = (struct io_pgtable_ops) { - .map = arm_lpae_map, .map_pages = arm_lpae_map_pages, - .unmap = arm_lpae_unmap, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, }; @@ -1176,7 +1161,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) int i, j; unsigned long iova; - size_t size; + size_t size, mapped; struct io_pgtable_ops *ops; selftest_running = true; @@ -1209,15 +1194,16 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, + GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1228,11 +1214,12 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) /* Partial unmap */ size = 1UL << __ffs(cfg->pgsize_bitmap); - if (ops->unmap(ops, SZ_1G + size, size, NULL) != size) + if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) return __FAIL(ops, i); /* Remap of partial unmap */ - if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, SZ_1G + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) @@ -1243,14 +1230,15 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops, i); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_WRITE, GFP_KERNEL, &mapped)) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) -- GitLab From b9bf41e249f8c8bf79389cec9d29faf03f79aad2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:42 +0000 Subject: [PATCH 286/694] iommu/io-pgtable-arm-v7s: Remove map/unmap With all users now calling {map,unmap}_pages, remove the wrappers. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/98481dd7e3576b74149ce2de8f217338ee1dd490.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 41 +++++++++++------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index ba3115fd0f86a..75f244a3e12df 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -564,8 +564,7 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, iova += pgsize; paddr += pgsize; - if (mapped) - *mapped += pgsize; + *mapped += pgsize; } /* * Synchronise all PTE updates for the new mapping before there's @@ -576,12 +575,6 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) -{ - return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL); -} - static void arm_v7s_free_pgtable(struct io_pgtable *iop) { struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); @@ -764,12 +757,6 @@ static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova return unmapped; } -static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) -{ - return arm_v7s_unmap_pages(ops, iova, size, 1, gather); -} - static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { @@ -842,9 +829,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, goto out_free_data; data->iop.ops = (struct io_pgtable_ops) { - .map = arm_v7s_map, .map_pages = arm_v7s_map_pages, - .unmap = arm_v7s_unmap, .unmap_pages = arm_v7s_unmap_pages, .iova_to_phys = arm_v7s_iova_to_phys, }; @@ -954,6 +939,7 @@ static int __init arm_v7s_do_selftests(void) }; unsigned int iova, size, iova_start; unsigned int i, loopnr = 0; + size_t mapped; selftest_running = true; @@ -984,15 +970,16 @@ static int __init arm_v7s_do_selftests(void) iova = 0; for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->map(ops, iova, iova, size, IOMMU_READ | - IOMMU_WRITE | - IOMMU_NOEXEC | - IOMMU_CACHE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, + IOMMU_READ | IOMMU_WRITE | + IOMMU_NOEXEC | IOMMU_CACHE, + GFP_KERNEL, &mapped)) return __FAIL(ops); /* Overlapping mappings */ - if (!ops->map(ops, iova, iova + size, size, - IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL)) + if (!ops->map_pages(ops, iova, iova + size, size, 1, + IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL, + &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) @@ -1007,11 +994,12 @@ static int __init arm_v7s_do_selftests(void) size = 1UL << __ffs(cfg.pgsize_bitmap); while (i < loopnr) { iova_start = i * SZ_16M; - if (ops->unmap(ops, iova_start + size, size, NULL) != size) + if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size) return __FAIL(ops); /* Remap of partial unmap */ - if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL)) + if (ops->map_pages(ops, iova_start + size, size, size, 1, + IOMMU_READ, GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova_start + size + 42) @@ -1025,14 +1013,15 @@ static int __init arm_v7s_do_selftests(void) for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->unmap(ops, iova, size, NULL) != size) + if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42)) return __FAIL(ops); /* Remap full block */ - if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL)) + if (ops->map_pages(ops, iova, iova, size, 1, IOMMU_WRITE, + GFP_KERNEL, &mapped)) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) -- GitLab From b169a180bef26679b44484ad24b7d8ae32623a10 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 15 Nov 2022 15:26:43 +0000 Subject: [PATCH 287/694] iommu/io-pgtable: Remove map/unmap With all users now calling {map,unmap}_pages, retire the redundant single-page callbacks. Signed-off-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/a5a3cbf95c3279982e378cc43dad830322a59868.1668100209.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1f068dfdb140c..1b7a44b35616c 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -150,9 +150,7 @@ struct io_pgtable_cfg { /** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * - * @map: Map a physically contiguous memory region. * @map_pages: Map a physically contiguous range of pages of the same size. - * @unmap: Unmap a physically contiguous memory region. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @@ -160,13 +158,9 @@ struct io_pgtable_cfg { * the same names. */ struct io_pgtable_ops { - int (*map)(struct io_pgtable_ops *ops, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather); size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather); -- GitLab From 6cf0981c2233f97d56938d9d61845383d6eb227c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 18 Nov 2022 17:36:04 +0800 Subject: [PATCH 288/694] iommu/amd: Fix pci device refcount leak in ppr_notifier() As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). So call it before returning from ppr_notifier() to avoid refcount leak. Fixes: daae2d25a477 ("iommu/amd: Don't copy GCR3 table root pointer") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221118093604.216371-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 6a1f02c62dffc..9f7fab49a5a90 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -587,6 +587,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) put_device_state(dev_state); out: + pci_dev_put(pdev); return ret; } -- GitLab From b09b56734fae28be9332021ae3e84c9b05020fda Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Fri, 18 Nov 2022 13:42:52 +0300 Subject: [PATCH 289/694] iommu/amd: Check return value of mmu_notifier_register() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Аdded a return value check for the function mmu_notifier_register(). Return value of a function 'mmu_notifier_register' called at iommu_v2.c:642 is not checked, but it is usually checked for this function Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Link: https://lore.kernel.org/r/20221118104252.122809-1-arefev@swemel.ru [joro: Fix commit message ] Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 9f7fab49a5a90..864e4ffb6aa94 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -640,7 +640,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (pasid_state->mm == NULL) goto out_free; - mmu_notifier_register(&pasid_state->mn, mm); + ret = mmu_notifier_register(&pasid_state->mn, mm); + if (ret) + goto out_free; ret = set_pasid_state(dev_state, pasid_state, pasid); if (ret) -- GitLab From 0d2573a2b7838a4f6934c2835e6730b38df4bcc9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Nov 2022 14:30:55 +0100 Subject: [PATCH 290/694] modpost: Join broken long printed messages Breaking long printed messages in multiple lines makes it very hard to look up where they originated from. Signed-off-by: Geert Uytterhoeven Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/mod/file2alias.c | 18 +++++++----------- scripts/mod/modpost.c | 8 +++----- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 80d973144fded..7df23905fdf1c 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -140,25 +140,22 @@ static void device_id_check(const char *modname, const char *device_id, int i; if (size % id_size || size < id_size) { - fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo " - "of the size of " - "section __mod_%s___device_table=%lu.\n" - "Fix definition of struct %s_device_id " - "in mod_devicetable.h\n", + fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo of the size of section __mod_%s___device_table=%lu.\n" + "Fix definition of struct %s_device_id in mod_devicetable.h\n", modname, device_id, id_size, device_id, size, device_id); } /* Verify last one is a terminator */ for (i = 0; i < id_size; i++ ) { if (*(uint8_t*)(symval+size-id_size+i)) { - fprintf(stderr,"%s: struct %s_device_id is %lu bytes. " - "The last of %lu is:\n", + fprintf(stderr, + "%s: struct %s_device_id is %lu bytes. The last of %lu is:\n", modname, device_id, id_size, size / id_size); for (i = 0; i < id_size; i++ ) fprintf(stderr,"0x%02x ", *(uint8_t*)(symval+size-id_size+i) ); fprintf(stderr,"\n"); - fatal("%s: struct %s_device_id is not terminated " - "with a NULL entry!\n", modname, device_id); + fatal("%s: struct %s_device_id is not terminated with a NULL entry!\n", + modname, device_id); } } } @@ -1154,8 +1151,7 @@ static int do_amba_entry(const char *filename, DEF_FIELD(symval, amba_id, mask); if ((id & mask) != id) - fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: " - "id=0x%08X, mask=0x%08X. Please fix this driver.\n", + fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: id=0x%08X, mask=0x%08X. Please fix this driver.\n", filename, id, mask); p += sprintf(alias, "amba:d"); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2c80da0220c32..56d856f2e5115 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -519,9 +519,8 @@ static int parse_elf(struct elf_info *info, const char *filename) int nobits = sechdrs[i].sh_type == SHT_NOBITS; if (!nobits && sechdrs[i].sh_offset > info->size) { - fatal("%s is truncated. sechdrs[i].sh_offset=%lu > " - "sizeof(*hrd)=%zu\n", filename, - (unsigned long)sechdrs[i].sh_offset, + fatal("%s is truncated. sechdrs[i].sh_offset=%lu > sizeof(*hrd)=%zu\n", + filename, (unsigned long)sechdrs[i].sh_offset, sizeof(*hdr)); return 0; } @@ -1355,8 +1354,7 @@ static void report_extable_warnings(const char* modname, struct elf_info* elf, get_pretty_name(is_function(tosym), &to_pretty_name, &to_pretty_name_p); - warn("%s(%s+0x%lx): Section mismatch in reference" - " from the %s %s%s to the %s %s:%s%s\n", + warn("%s(%s+0x%lx): Section mismatch in reference from the %s %s%s to the %s %s:%s%s\n", modname, fromsec, (long)r->r_offset, from_pretty_name, fromsym_name, from_pretty_name_p, to_pretty_name, tosec, tosym_name, to_pretty_name_p); -- GitLab From 9f8fe647797a4bc049bc7cceaf3a63584678ba04 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 10 Nov 2022 11:59:05 -0800 Subject: [PATCH 291/694] Makefile.debug: support for -gz=zstd Make DEBUG_INFO_COMPRESSED a choice; DEBUG_INFO_COMPRESSED_NONE is the default, DEBUG_INFO_COMPRESSED_ZLIB uses zlib, DEBUG_INFO_COMPRESSED_ZSTD uses zstd. This renames the existing KConfig option DEBUG_INFO_COMPRESSED to DEBUG_INFO_COMPRESSED_ZLIB so users upgrading may need to reset the new Kconfigs. Some quick N=1 measurements with du, /usr/bin/time -v, and bloaty: clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_NONE=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 0:55.43 488M vmlinux 27.6% 136Mi 0.0% 0 .debug_info 6.1% 30.2Mi 0.0% 0 .debug_str_offsets 3.5% 17.2Mi 0.0% 0 .debug_line 3.3% 16.3Mi 0.0% 0 .debug_loclists 0.9% 4.62Mi 0.0% 0 .debug_str clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZLIB=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 1:00.35 385M vmlinux 21.8% 85.4Mi 0.0% 0 .debug_info 2.1% 8.26Mi 0.0% 0 .debug_str_offsets 2.1% 8.24Mi 0.0% 0 .debug_loclists 1.9% 7.48Mi 0.0% 0 .debug_line 0.5% 1.94Mi 0.0% 0 .debug_str clang-16, x86_64 defconfig plus CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZSTD=y: Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.69 373M vmlinux 21.4% 81.4Mi 0.0% 0 .debug_info 2.3% 8.85Mi 0.0% 0 .debug_loclists 1.5% 5.71Mi 0.0% 0 .debug_line 0.5% 1.95Mi 0.0% 0 .debug_str_offsets 0.4% 1.62Mi 0.0% 0 .debug_str That's only a 3.11% overall binary size savings over zlib, but at no performance regression. Link: https://maskray.me/blog/2022-09-09-zstd-compressed-debug-sections Link: https://maskray.me/blog/2022-01-23-compressed-debug-sections Suggested-by: Sedat Dilek (DHL Supply Chain) Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- lib/Kconfig.debug | 29 +++++++++++++++++++++++++++-- scripts/Makefile.debug | 6 +++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade33..d93dbe5a1d143 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -312,8 +312,21 @@ config DEBUG_INFO_REDUCED DEBUG_INFO build and compile times are reduced too. Only works with newer gcc versions. -config DEBUG_INFO_COMPRESSED - bool "Compressed debugging information" +choice + prompt "Compressed Debug information" + help + Compress the resulting debug info. Results in smaller debug info sections, + but requires that consumers are able to decompress the results. + + If unsure, choose DEBUG_INFO_COMPRESSED_NONE. + +config DEBUG_INFO_COMPRESSED_NONE + bool "Don't compress debug information" + help + Don't compress debug info sections. + +config DEBUG_INFO_COMPRESSED_ZLIB + bool "Compress debugging information with zlib" depends on $(cc-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help @@ -327,6 +340,18 @@ config DEBUG_INFO_COMPRESSED preferable to setting $KDEB_COMPRESS to "none" which would be even larger. +config DEBUG_INFO_COMPRESSED_ZSTD + bool "Compress debugging information with zstd" + depends on $(cc-option,-gz=zstd) + depends on $(ld-option,--compress-debug-sections=zstd) + help + Compress the debug information using zstd. This may provide better + compression than zlib, for about the same time costs, but requires newer + toolchain support. Requires GCC 13.0+ or Clang 16.0+, binutils 2.40+, and + zstd. + +endchoice # "Compressed Debug information" + config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" depends on $(cc-option,-gsplit-dwarf) diff --git a/scripts/Makefile.debug b/scripts/Makefile.debug index 332c486f705f7..059ff38fe0cb3 100644 --- a/scripts/Makefile.debug +++ b/scripts/Makefile.debug @@ -27,10 +27,14 @@ else DEBUG_RUSTFLAGS += -Cdebuginfo=2 endif -ifdef CONFIG_DEBUG_INFO_COMPRESSED +ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZLIB DEBUG_CFLAGS += -gz=zlib KBUILD_AFLAGS += -gz=zlib KBUILD_LDFLAGS += --compress-debug-sections=zlib +else ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZSTD +DEBUG_CFLAGS += -gz=zstd +KBUILD_AFLAGS += -gz=zstd +KBUILD_LDFLAGS += --compress-debug-sections=zstd endif KBUILD_CFLAGS += $(DEBUG_CFLAGS) -- GitLab From 30daacc571d1416f24abd4cc49910ff9322a8cf6 Mon Sep 17 00:00:00 2001 From: KaiLong Wang Date: Sun, 13 Nov 2022 17:29:50 +0800 Subject: [PATCH 292/694] modpost: fix array_size.cocci warning Fix following coccicheck warning: scripts/mod/sumversion.c:219:48-49: WARNING: Use ARRAY_SIZE scripts/mod/sumversion.c:156:48-49: WARNING: Use ARRAY_SIZE Signed-off-by: KaiLong Wang Signed-off-by: Masahiro Yamada --- scripts/mod/sumversion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca09684..31066bfdba04e 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -153,7 +153,7 @@ static void md4_transform(uint32_t *hash, uint32_t const *in) static inline void md4_transform_helper(struct md4_ctx *ctx) { - le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(uint32_t)); + le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block)); md4_transform(ctx->hash, ctx->block); } @@ -216,7 +216,7 @@ static void md4_final_ascii(struct md4_ctx *mctx, char *out, unsigned int len) le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - sizeof(uint64_t)) / sizeof(uint32_t)); md4_transform(mctx->hash, mctx->block); - cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t)); + cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash)); snprintf(out, len, "%08X%08X%08X%08X", mctx->hash[0], mctx->hash[1], mctx->hash[2], mctx->hash[3]); -- GitLab From 1791360cb37ff5ef797afe9006cb315ebb7e969e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:41 +0900 Subject: [PATCH 293/694] kconfig: remove unneeded variable in get_prompt_str() The variable 'accessible' is redundant. Signed-off-by: Masahiro Yamada --- scripts/kconfig/menu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 109325f31bef3..b90fff833588a 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -724,10 +724,8 @@ static void get_prompt_str(struct gstr *r, struct property *prop, menu = prop->menu; for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { - bool accessible = menu_is_visible(menu); - submenu[i++] = menu; - if (location == NULL && accessible) + if (location == NULL && menu_is_visible(menu)) location = menu; } if (head && location) { -- GitLab From 4d980fd111237ab64705b982f61f284c2a7885e5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:42 +0900 Subject: [PATCH 294/694] kconfig: remove const qualifier from str_get() update_text() apparently edits the buffer returned by str_get(). (and there is no reason why it shouldn't) Remove 'const' quailifier and casting. Signed-off-by: Masahiro Yamada --- scripts/kconfig/lkc.h | 2 +- scripts/kconfig/mconf.c | 4 ++-- scripts/kconfig/util.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 6ac2eabe109d2..e7118d62a45fc 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -76,7 +76,7 @@ struct gstr str_new(void); void str_free(struct gstr *gs); void str_append(struct gstr *gs, const char *s); void str_printf(struct gstr *gs, const char *fmt, ...); -const char *str_get(struct gstr *gs); +char *str_get(struct gstr *gs); /* menu.c */ void _menu_init(void); diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 9d3cf510562f8..d7f7e1bf7dd4f 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -440,8 +440,8 @@ static void search_conf(void) res = get_relations_str(sym_arr, &head); set_subtitle(); - dres = show_textbox_ext("Search Results", (char *) - str_get(&res), 0, 0, keys, &vscroll, + dres = show_textbox_ext("Search Results", str_get(&res), 0, 0, + keys, &vscroll, &hscroll, &update_text, (void *) &data); again = false; diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index 29585394df71d..b78f114ad48cc 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -74,7 +74,7 @@ void str_printf(struct gstr *gs, const char *fmt, ...) } /* Retrieve value of growable string */ -const char *str_get(struct gstr *gs) +char *str_get(struct gstr *gs) { return gs->s; } -- GitLab From be5ea98983efe2a2c5156c3b43e35a076d5b640d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 13 Nov 2022 19:59:43 +0900 Subject: [PATCH 295/694] kconfig: remove redundant (void *) cast in search_conf() The (void *) cast is redundant because the last argument of show_textbox_ext() is an opaque pointer. Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index d7f7e1bf7dd4f..9c549683c6275 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -441,8 +441,7 @@ static void search_conf(void) res = get_relations_str(sym_arr, &head); set_subtitle(); dres = show_textbox_ext("Search Results", str_get(&res), 0, 0, - keys, &vscroll, - &hscroll, &update_text, (void *) + keys, &vscroll, &hscroll, &update_text, &data); again = false; for (i = 0; i < JUMP_NB && keys[i]; i++) -- GitLab From f8f4dc7685c72c8ef86420566a38a4f786613851 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 15 Nov 2022 03:10:55 +0900 Subject: [PATCH 296/694] scripts/jobserver-exec: parse the last --jobserver-auth= option In the GNU Make manual, the section "Sharing Job Slots with GNU make" says: Be aware that the MAKEFLAGS variable may contain multiple instances of the --jobserver-auth= option. Only the last instance is relevant. Take the last element of the array, not the first. Link: https://www.gnu.org/software/make/manual/html_node/Job-Slots.html Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/jobserver-exec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 8762887a970ce..4192855f5b8b5 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -23,7 +23,9 @@ try: opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] # Parse out R,W file descriptor numbers and set them nonblocking. - fds = opts[0].split("=", 1)[1] + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] reader, writer = [int(x) for x in fds.split(",", 1)] # Open a private copy of reader to avoid setting nonblocking # on an unexpecting process with the same reader fd. -- GitLab From 5724ac5589ad93d35d95a845222f566175c681b8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 17 Nov 2022 10:30:33 +0900 Subject: [PATCH 297/694] kbuild: deb-pkg: get rid of |flex:native workaround from Build-Depends "| flex:native" was a workaround (suggested by Ben, see Link) because "MultiArch: foreign" was missing in the flex package on some old distros when commit e3a22850664f ("deb-pkg: generate correct build dependencies") was applied. It seems fixing the flex package has been completed. Get rid of the workaround. Link: https://lore.kernel.org/linux-kbuild/ab49b0582ef12b14b1a68877263b81813e2492a2.camel@decadent.org.uk/ Link: https://wiki.debian.org/CrossBuildPackagingGuidelines Signed-off-by: Masahiro Yamada Reviewed-by: Ben Hutchings --- scripts/package/mkdebian | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index a3ac5a716e9fc..6cf383225b8b5 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -175,7 +175,7 @@ Section: kernel Priority: optional Maintainer: $maintainer Rules-Requires-Root: no -Build-Depends: bc, rsync, kmod, cpio, bison, flex | flex:native $extra_build_depends +Build-Depends: bc, rsync, kmod, cpio, bison, flex $extra_build_depends Homepage: https://www.kernel.org/ Package: $packagename-$version -- GitLab From 5e5ff73c2e5863f93fc5fd78d178cd8f2af12464 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Mon, 17 Oct 2022 20:04:50 +0530 Subject: [PATCH 298/694] asm-generic/io: Add _RET_IP_ to MMIO trace for more accurate debug info Due to compiler optimizations like inlining, there are cases where MMIO traces using _THIS_IP_ for caller information might not be sufficient to provide accurate debug traces. 1) With optimizations (Seen with GCC): In this case, _THIS_IP_ works fine and prints the caller information since it will be inlined into the caller and we get the debug traces on who made the MMIO access, for ex: rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 addr=0xffff8000087447f4 rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 val=0x0 addr=0xffff8000087447f4 2) Without optimizations (Seen with Clang): _THIS_IP_ will not be sufficient in this case as it will print only the MMIO accessors itself which is of not much use since it is not inlined as below for example: rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4 So in order to handle this second case as well irrespective of the compiler optimizations, add _RET_IP_ to MMIO trace to make it provide more accurate debug information in all these scenarios. Before: rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4 After: rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 addr=0xffff8000087447f4 rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 val=0x0 addr=0xffff8000087447f4 Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors") Signed-off-by: Sai Prakash Ranjan Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 80 +++++++++++++++++------------------ include/trace/events/rwmmio.h | 43 ++++++++++++------- lib/trace_readwrite.c | 16 +++---- 3 files changed, 75 insertions(+), 64 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index a68f8fbf423be..4c44a29b5e8ef 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -80,24 +80,24 @@ DECLARE_TRACEPOINT(rwmmio_read); DECLARE_TRACEPOINT(rwmmio_post_read); void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr); + unsigned long caller_addr, unsigned long caller_addr0); #else static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) {} + unsigned long caller_addr, unsigned long caller_addr0) {} #endif /* CONFIG_TRACE_MMIO_ACCESS */ @@ -188,11 +188,11 @@ static inline u8 readb(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_); + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __raw_readb(addr); __io_ar(val); - log_post_read_mmio(val, 8, addr, _THIS_IP_); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -203,11 +203,11 @@ static inline u16 readw(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_); + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); __io_ar(val); - log_post_read_mmio(val, 16, addr, _THIS_IP_); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -218,11 +218,11 @@ static inline u32 readl(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_); + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); __io_ar(val); - log_post_read_mmio(val, 32, addr, _THIS_IP_); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -234,11 +234,11 @@ static inline u64 readq(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_); + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le64_to_cpu(__raw_readq(addr)); __io_ar(val); - log_post_read_mmio(val, 64, addr, _THIS_IP_); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -248,11 +248,11 @@ static inline u64 readq(const volatile void __iomem *addr) #define writeb writeb static inline void writeb(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_); + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeb(value, addr); __io_aw(); - log_post_write_mmio(value, 8, addr, _THIS_IP_); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -260,11 +260,11 @@ static inline void writeb(u8 value, volatile void __iomem *addr) #define writew writew static inline void writew(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_); + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writew((u16 __force)cpu_to_le16(value), addr); __io_aw(); - log_post_write_mmio(value, 16, addr, _THIS_IP_); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -272,11 +272,11 @@ static inline void writew(u16 value, volatile void __iomem *addr) #define writel writel static inline void writel(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_); + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writel((u32 __force)__cpu_to_le32(value), addr); __io_aw(); - log_post_write_mmio(value, 32, addr, _THIS_IP_); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -285,11 +285,11 @@ static inline void writel(u32 value, volatile void __iomem *addr) #define writeq writeq static inline void writeq(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_); + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeq(__cpu_to_le64(value), addr); __io_aw(); - log_post_write_mmio(value, 64, addr, _THIS_IP_); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif #endif /* CONFIG_64BIT */ @@ -305,9 +305,9 @@ static inline u8 readb_relaxed(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_); + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); val = __raw_readb(addr); - log_post_read_mmio(val, 8, addr, _THIS_IP_); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -318,9 +318,9 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_); + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); val = __le16_to_cpu(__raw_readw(addr)); - log_post_read_mmio(val, 16, addr, _THIS_IP_); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -331,9 +331,9 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_); + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); val = __le32_to_cpu(__raw_readl(addr)); - log_post_read_mmio(val, 32, addr, _THIS_IP_); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -344,9 +344,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_); + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); val = __le64_to_cpu(__raw_readq(addr)); - log_post_read_mmio(val, 64, addr, _THIS_IP_); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -355,9 +355,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) #define writeb_relaxed writeb_relaxed static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_); + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __raw_writeb(value, addr); - log_post_write_mmio(value, 8, addr, _THIS_IP_); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -365,9 +365,9 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) #define writew_relaxed writew_relaxed static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_); + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __raw_writew(cpu_to_le16(value), addr); - log_post_write_mmio(value, 16, addr, _THIS_IP_); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -375,9 +375,9 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) #define writel_relaxed writel_relaxed static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_); + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __raw_writel(__cpu_to_le32(value), addr); - log_post_write_mmio(value, 32, addr, _THIS_IP_); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -385,9 +385,9 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) #define writeq_relaxed writeq_relaxed static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_); + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __raw_writeq(__cpu_to_le64(value), addr); - log_post_write_mmio(value, 64, addr, _THIS_IP_); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif diff --git a/include/trace/events/rwmmio.h b/include/trace/events/rwmmio.h index de41159216c19..a43e5dd7436b5 100644 --- a/include/trace/events/rwmmio.h +++ b/include/trace/events/rwmmio.h @@ -12,12 +12,14 @@ DECLARE_EVENT_CLASS(rwmmio_rw_template, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr), + TP_ARGS(caller, caller0, val, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u64, val) __field(u8, width) @@ -25,56 +27,64 @@ DECLARE_EVENT_CLASS(rwmmio_rw_template, TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->val = val; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d val=%#llx addr=%#lx", - (void *)__entry->caller, __entry->width, + TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->val, __entry->addr) ); DEFINE_EVENT(rwmmio_rw_template, rwmmio_write, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr) + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), + TP_ARGS(caller, caller0, val, width, addr) ); DEFINE_EVENT(rwmmio_rw_template, rwmmio_post_write, - TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr) + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + volatile void __iomem *addr), + TP_ARGS(caller, caller0, val, width, addr) ); TRACE_EVENT(rwmmio_read, - TP_PROTO(unsigned long caller, u8 width, const volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u8 width, + const volatile void __iomem *addr), - TP_ARGS(caller, width, addr), + TP_ARGS(caller, caller0, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u8, width) ), TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d addr=%#lx", - (void *)__entry->caller, __entry->width, __entry->addr) + TP_printk("%pS -> %pS width=%d addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->addr) ); TRACE_EVENT(rwmmio_post_read, - TP_PROTO(unsigned long caller, u64 val, u8 width, const volatile void __iomem *addr), + TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width, + const volatile void __iomem *addr), - TP_ARGS(caller, val, width, addr), + TP_ARGS(caller, caller0, val, width, addr), TP_STRUCT__entry( __field(unsigned long, caller) + __field(unsigned long, caller0) __field(unsigned long, addr) __field(u64, val) __field(u8, width) @@ -82,13 +92,14 @@ TRACE_EVENT(rwmmio_post_read, TP_fast_assign( __entry->caller = caller; + __entry->caller0 = caller0; __entry->val = val; __entry->addr = (unsigned long)addr; __entry->width = width; ), - TP_printk("%pS width=%d val=%#llx addr=%#lx", - (void *)__entry->caller, __entry->width, + TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx", + (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->val, __entry->addr) ); diff --git a/lib/trace_readwrite.c b/lib/trace_readwrite.c index 88637038b30c8..62b4e8b3c733b 100644 --- a/lib/trace_readwrite.c +++ b/lib/trace_readwrite.c @@ -14,33 +14,33 @@ #ifdef CONFIG_TRACE_MMIO_ACCESS void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_write(caller_addr, val, width, addr); + trace_rwmmio_write(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_write_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_write); void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_post_write(caller_addr, val, width, addr); + trace_rwmmio_post_write(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_post_write_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_write); void log_read_mmio(u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_read(caller_addr, width, addr); + trace_rwmmio_read(caller_addr, caller_addr0, width, addr); } EXPORT_SYMBOL_GPL(log_read_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_read); void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, - unsigned long caller_addr) + unsigned long caller_addr, unsigned long caller_addr0) { - trace_rwmmio_post_read(caller_addr, val, width, addr); + trace_rwmmio_post_read(caller_addr, caller_addr0, val, width, addr); } EXPORT_SYMBOL_GPL(log_post_read_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_read); -- GitLab From ec62b4424174f41bdcedd08d12d7bed80088453d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:43 +0800 Subject: [PATCH 299/694] iommu/vt-d: Allocate pasid table in device probe path Whether or not a domain is attached to the device, the pasid table should always be valid as long as it has been probed. This moves the pasid table allocation from the domain attaching device path to device probe path and frees it in the device release path. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee6..6b8a24f68da8d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2475,13 +2475,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { - ret = intel_pasid_alloc_table(dev); - if (ret) { - dev_err(dev, "PASID table allocation failed\n"); - dmar_remove_one_dev_info(dev); - return ret; - } - /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, @@ -4106,7 +4099,6 @@ static void dmar_remove_one_dev_info(struct device *dev) iommu_disable_dev_iotlb(info); domain_context_clear(info); - intel_pasid_free_table(info->dev); } spin_lock_irqsave(&domain->lock, flags); @@ -4466,6 +4458,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) struct device_domain_info *info; struct intel_iommu *iommu; u8 bus, devfn; + int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu || !iommu->iommu.ops) @@ -4509,6 +4502,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_iommu_priv_set(dev, info); + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { + ret = intel_pasid_alloc_table(dev); + if (ret) { + dev_err(dev, "PASID table allocation failed\n"); + dev_iommu_priv_set(dev, NULL); + kfree(info); + return ERR_PTR(ret); + } + } + return &iommu->iommu; } @@ -4517,6 +4520,7 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); dmar_remove_one_dev_info(dev); + intel_pasid_free_table(dev); dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); -- GitLab From c7be17c2903d4acbf9aa372bfb6e2a418387fce0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:44 +0800 Subject: [PATCH 300/694] iommu/vt-d: Add device_block_translation() helper If domain attaching to device fails, the IOMMU driver should bring the device to blocking DMA state. The upper layer is expected to recover it by attaching a new domain. Use device_block_translation() in the error path of dev_attach to make the behavior specific. The difference between device_block_translation() and the previous dmar_remove_one_dev_info() is that, in the scalable mode, it is the RID2PASID entry instead of context entry being cleared. As a result, enabling PCI capabilities is moved up. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 44 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6b8a24f68da8d..6aafb86ef5c34 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -277,7 +277,7 @@ static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -static void dmar_remove_one_dev_info(struct device *dev); +static void device_block_translation(struct device *dev); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -1400,7 +1400,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; - if (!info || !dev_is_pci(info->dev)) + if (!dev_is_pci(info->dev)) return; pdev = to_pci_dev(info->dev); @@ -2045,7 +2045,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } else { iommu_flush_write_buffer(iommu); } - iommu_enable_pci_caps(info); ret = 0; @@ -2487,7 +2486,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) dev, PASID_RID2PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); - dmar_remove_one_dev_info(dev); + device_block_translation(dev); return ret; } } @@ -2495,10 +2494,12 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) ret = domain_context_mapping(domain, dev); if (ret) { dev_err(dev, "Domain context map failed\n"); - dmar_remove_one_dev_info(dev); + device_block_translation(dev); return ret; } + iommu_enable_pci_caps(info); + return 0; } @@ -4109,6 +4110,37 @@ static void dmar_remove_one_dev_info(struct device *dev) info->domain = NULL; } +/* + * Clear the page table pointer in context or pasid table entries so that + * all DMA requests without PASID from the device are blocked. If the page + * table has been set, clean up the data structures. + */ +static void device_block_translation(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + unsigned long flags; + + iommu_disable_dev_iotlb(info); + if (!dev_is_real_dma_subdevice(dev)) { + if (sm_supported(iommu)) + intel_pasid_tear_down_entry(iommu, dev, + PASID_RID2PASID, false); + else + domain_context_clear(info); + } + + if (!info->domain) + return; + + spin_lock_irqsave(&info->domain->lock, flags); + list_del(&info->link); + spin_unlock_irqrestore(&info->domain->lock, flags); + + domain_detach_iommu(info->domain, iommu); + info->domain = NULL; +} + static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; @@ -4232,7 +4264,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); if (info->domain) - dmar_remove_one_dev_info(dev); + device_block_translation(dev); } ret = prepare_domain_attach_device(domain, dev); -- GitLab From 35a99c54dd60103930db4a472dd15f232e754867 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:45 +0800 Subject: [PATCH 301/694] iommu/vt-d: Add blocking domain support The Intel IOMMU hardwares support blocking DMA transactions by clearing the translation table entries. This implements a real blocking domain to avoid using an empty UNMANAGED domain. The detach_dev callback of the domain ops is not used in any path. Remove it to avoid dead code as well. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6aafb86ef5c34..25c772e8106f3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -278,6 +278,7 @@ static LIST_HEAD(dmar_satc_units); list_for_each_entry(rmrr, &dmar_rmrr_units, list) static void device_block_translation(struct device *dev); +static void intel_iommu_domain_free(struct iommu_domain *domain); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -4162,12 +4163,28 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static int blocking_domain_attach_dev(struct iommu_domain *domain, + struct device *dev) +{ + device_block_translation(dev); + return 0; +} + +static struct iommu_domain blocking_domain = { + .ops = &(const struct iommu_domain_ops) { + .attach_dev = blocking_domain_attach_dev, + .free = intel_iommu_domain_free + } +}; + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; switch (type) { + case IOMMU_DOMAIN_BLOCKED: + return &blocking_domain; case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: case IOMMU_DOMAIN_UNMANAGED: @@ -4200,7 +4217,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) static void intel_iommu_domain_free(struct iommu_domain *domain) { - if (domain != &si_domain->domain) + if (domain != &si_domain->domain && domain != &blocking_domain) domain_exit(to_dmar_domain(domain)); } @@ -4274,12 +4291,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } -static void intel_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - dmar_remove_one_dev_info(dev); -} - static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -4767,7 +4778,6 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, - .detach_dev = intel_iommu_detach_device, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, -- GitLab From ba502132f5430d66f768569f2af32b8f268322a8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:46 +0800 Subject: [PATCH 302/694] iommu/vt-d: Rename iommu_disable_dev_iotlb() Rename iommu_disable_dev_iotlb() to iommu_disable_pci_caps() to pair with iommu_enable_pci_caps(). Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 25c772e8106f3..a5885665ccef1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1441,7 +1441,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) } } -static void iommu_disable_dev_iotlb(struct device_domain_info *info) +static void iommu_disable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -4099,7 +4099,7 @@ static void dmar_remove_one_dev_info(struct device *dev) intel_pasid_tear_down_entry(iommu, info->dev, PASID_RID2PASID, false); - iommu_disable_dev_iotlb(info); + iommu_disable_pci_caps(info); domain_context_clear(info); } @@ -4122,7 +4122,7 @@ static void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; - iommu_disable_dev_iotlb(info); + iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, -- GitLab From a8204479f284a9d21c22e2fd7c9f7564b5828553 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:47 +0800 Subject: [PATCH 303/694] iommu/vt-d: Rename domain_add_dev_info() dmar_domain_attach_device() is more meaningful according to what this helper does. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a5885665ccef1..3bd79ae238f2d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2453,7 +2453,8 @@ static int __init si_domain_init(int hw) return 0; } -static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) +static int dmar_domain_attach_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; @@ -4288,7 +4289,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - return domain_add_dev_info(to_dmar_domain(domain), dev); + return dmar_domain_attach_device(to_dmar_domain(domain), dev); } static int intel_iommu_map(struct iommu_domain *domain, -- GitLab From b1cf1563f3b7396a2cb76b12b3bcdd7046b46372 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:48 +0800 Subject: [PATCH 304/694] iommu/vt-d: Remove unnecessary domain_context_mapped() The device_domain_info::domain accurately records the domain attached to the device. It is unnecessary to check whether the context is present in the attach_dev path. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 47 +++---------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3bd79ae238f2d..3b37f1b3b6deb 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -780,19 +780,6 @@ static void domain_flush_cache(struct dmar_domain *domain, clflush_cache_range(addr, size); } -static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - struct context_entry *context; - int ret = 0; - - spin_lock(&iommu->lock); - context = iommu_context_addr(iommu, bus, devfn, 0); - if (context) - ret = context_present(context); - spin_unlock(&iommu->lock); - return ret; -} - static void free_context_table(struct intel_iommu *iommu) { struct context_entry *context; @@ -2097,30 +2084,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) &domain_context_mapping_cb, &data); } -static int domain_context_mapped_cb(struct pci_dev *pdev, - u16 alias, void *opaque) -{ - struct intel_iommu *iommu = opaque; - - return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); -} - -static int domain_context_mapped(struct device *dev) -{ - struct intel_iommu *iommu; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return -ENODEV; - - if (!dev_is_pci(dev)) - return device_context_mapped(iommu, bus, devfn); - - return !pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapped_cb, iommu); -} - /* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) @@ -4269,6 +4232,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; if (domain->type == IOMMU_DOMAIN_UNMANAGED && @@ -4277,13 +4241,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } - /* normally dev is not mapped */ - if (unlikely(domain_context_mapped(dev))) { - struct device_domain_info *info = dev_iommu_priv_get(dev); - - if (info->domain) - device_block_translation(dev); - } + if (info->domain) + device_block_translation(dev); ret = prepare_domain_attach_device(domain, dev); if (ret) -- GitLab From e5b0feb4361a4830b9133f57ed13923d70409b69 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 22 Nov 2022 08:29:49 +0800 Subject: [PATCH 305/694] iommu/vt-d: Use real field for indication of first level The dmar_domain uses bit field members to indicate the behaviors. Add a bit field for using first level and remove the flags member to avoid duplication. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221118132451.114406-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 25 ++++++++++--------------- drivers/iommu/intel/iommu.h | 15 +++++---------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3b37f1b3b6deb..a3db7ac3d60c1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -383,11 +383,6 @@ static inline int domain_type_is_si(struct dmar_domain *domain) return domain->domain.type == IOMMU_DOMAIN_IDENTITY; } -static inline bool domain_use_first_level(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; -} - static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -501,7 +496,7 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - if (domain && domain_use_first_level(domain)) { + if (domain && domain->use_first_level) { if (!cap_fl1gp_support(iommu->cap)) mask = 0x1; } else { @@ -579,7 +574,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) * paging and 57-bits with 5-level paging). Hence, skip bit * [N-1]. */ - if (domain_use_first_level(domain)) + if (domain->use_first_level) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); else domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); @@ -947,7 +942,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) + if (domain->use_first_level) pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (cmpxchg64(&pte->val, 0ULL, pteval)) @@ -1498,7 +1493,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain_use_first_level(domain)) { + if (domain->use_first_level) { qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1552,7 +1547,7 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, * It's a non-present to present mapping. Only flush if caching mode * and second level. */ - if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain)) + if (cap_caching_mode(iommu->cap) && !domain->use_first_level) iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); else iommu_flush_write_buffer(iommu); @@ -1568,7 +1563,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) struct intel_iommu *iommu = info->iommu; u16 did = domain_id_iommu(dmar_domain, iommu); - if (domain_use_first_level(dmar_domain)) + if (dmar_domain->use_first_level) qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, @@ -1741,7 +1736,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) - domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; + domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); spin_lock_init(&domain->lock); @@ -2173,7 +2168,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); attr |= DMA_FL_PTE_PRESENT; - if (domain_use_first_level(domain)) { + if (domain->use_first_level) { attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; @@ -2443,7 +2438,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); - else if (domain_use_first_level(domain)) + else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, PASID_RID2PASID); else @@ -4412,7 +4407,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) * Second level page table supports per-PTE snoop control. The * iommu_map() interface will handle this by setting SNP bit. */ - if (!domain_use_first_level(domain)) { + if (!domain->use_first_level) { domain->set_pte_snp = true; return; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513a..30b0d72aeb6c4 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -517,14 +517,6 @@ struct context_entry { u64 hi; }; -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) - struct iommu_domain_info { struct intel_iommu *iommu; unsigned int refcnt; /* Refcount of devices per iommu */ @@ -541,6 +533,11 @@ struct dmar_domain { u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; + u8 use_first_level:1; /* DMA translation for the domain goes + * through the first level page table, + * otherwise, goes through the second + * level. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -550,8 +547,6 @@ struct dmar_domain { /* adjusted guest address width, 0 is level 2 30-bit */ int agaw; - - int flags; /* flags to find out type of domain */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ -- GitLab From a2430b25c31840a6dcbf95c65415d5fee2984dbc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 19 Nov 2022 04:15:50 +0900 Subject: [PATCH 306/694] kbuild: add kbuild-file macro While building, installing, cleaning, Kbuild visits sub-directories and includes 'Kbuild' or 'Makefile' that exists there. Add 'kbuild-file' macro, and reuse it from scripts/Makefie.* Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin --- scripts/Kbuild.include | 5 +++++ scripts/Makefile.asm-generic | 6 +++--- scripts/Makefile.build | 6 +----- scripts/Makefile.clean | 5 +---- scripts/Makefile.dtbinst | 2 +- scripts/Makefile.modpost | 2 +- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 2bc08ace38a3b..cbe28744637b7 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -40,6 +40,11 @@ escsq = $(subst $(squote),'\$(squote)',$1) # Quote a string to pass it to C files. foo => '"foo"' stringify = $(squote)$(quote)$1$(quote)$(squote) +### +# The path to Kbuild or Makefile. Kbuild has precedence over Makefile. +kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) +kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) + ### # Easy method for doing a status message kecho := : diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic index 1d501c57f9eff..8d01b37b76775 100644 --- a/scripts/Makefile.asm-generic +++ b/scripts/Makefile.asm-generic @@ -10,15 +10,15 @@ PHONY := all all: src := $(subst /generated,,$(obj)) --include $(src)/Kbuild + +include $(srctree)/scripts/Kbuild.include +-include $(kbuild-file) # $(generic)/Kbuild lists mandatory-y. Exclude um since it is a special case. ifneq ($(SRCARCH),um) include $(srctree)/$(generic)/Kbuild endif -include $(srctree)/scripts/Kbuild.include - redundant := $(filter $(mandatory-y) $(generated-y), $(generic-y)) redundant += $(foreach f, $(generic-y), $(if $(wildcard $(srctree)/$(src)/$(f)),$(f))) redundant := $(sort $(redundant)) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 41f3602fc8de7..37cf88d076e87 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -38,11 +38,7 @@ subdir-ccflags-y := include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.compiler - -# The filename Kbuild has precedence over Makefile -kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) - +include $(kbuild-file) include $(srctree)/scripts/Makefile.lib # Do not include hostprogs rules unless needed. diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index 878cec6489595..3649900696ddd 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -9,10 +9,7 @@ PHONY := __clean __clean: include $(srctree)/scripts/Kbuild.include - -# The filename Kbuild has precedence over Makefile -kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) +include $(kbuild-file) # Figure out what we need to build from the various variables # ========================================================================== diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 190d781e84f4b..2ab936e4179da 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -15,7 +15,7 @@ __dtbs_install: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -include $(src)/Makefile +include $(kbuild-file) dtbs := $(addprefix $(dst)/, $(dtb-y) $(if $(CONFIG_OF_ALL_DTBS),$(dtb-))) subdirs := $(addprefix $(obj)/, $(subdir-y) $(subdir-m)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d429c..55a72f5eb76dd 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -93,7 +93,7 @@ obj := $(KBUILD_EXTMOD) src := $(obj) # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS -include $(or $(wildcard $(src)/Kbuild), $(src)/Makefile) +include $(kbuild-file) module.symvers-if-present := $(wildcard Module.symvers) output-symdump := $(KBUILD_EXTMOD)/Module.symvers -- GitLab From 598afa050403ddbb015ad4d9f8e6b911c3c93d33 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 19 Nov 2022 04:15:51 +0900 Subject: [PATCH 307/694] kbuild: warn objects shared among multiple modules If an object is shared among multiple modules, and some of them are configured as 'm', but the others as 'y', the shared object is built as modular, then linked to the modules and vmlinux. This is a potential issue because the expected CFLAGS are different between modules and builtins. Commit 637a642f5ca5 ("zstd: Fixing mixed module-builtin objects") reported that this could be even more fatal in some cases such as Clang LTO. That commit fixed lib/zlib/zstd_{compress,decompress}, but there are still more instances of breakage. This commit adds a W=1 warning for shared objects, so that the kbuild test robot, which provides build tests with W=1, will avoid a new breakage slipping in. Quick compile tests on v6.1-rc4 detected the following: scripts/Makefile.build:252: ./drivers/block/rnbd/Makefile: rnbd-common.o is added to multiple modules: rnbd-client rnbd-server scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: cn10k_cpt.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cptlf.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cpt_mbox_common.o is added to multiple modules: rvu_cptpf rvu_cptvf scripts/Makefile.build:252: ./drivers/edac/Makefile: skx_common.o is added to multiple modules: i10nm_edac skx_edac scripts/Makefile.build:252: ./drivers/gpu/drm/bridge/imx/Makefile: imx-ldb-helper.o is added to multiple modules: imx8qm-ldb imx8qxp-ldb scripts/Makefile.build:252: ./drivers/mfd/Makefile: rsmu_core.o is added to multiple modules: rsmu-i2c rsmu-spi scripts/Makefile.build:252: ./drivers/mtd/tests/Makefile: mtd_test.o is added to multiple modules: mtd_nandbiterrs mtd_oobtest mtd_pagetest mtd_readtest mtd_speedtest mtd_stresstest mtd_subpagetest mtd_torturetest scripts/Makefile.build:252: ./drivers/net/dsa/ocelot/Makefile: felix.o is added to multiple modules: mscc_felix mscc_seville scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_pf_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_vf_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn66xx_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn68xx_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_core.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_ethtool.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_device.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_droq.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mailbox.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mem_ops.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_nic.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: request_manager.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: response_manager.o is added to multiple modules: liquidio liquidio_vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpaa2-mac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpmac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_cbdr.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_ethtool.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc.o is added to multiple modules: fsl-enetc fsl-enetc-vf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_cmd.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_rss.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_tqp_stats.o is added to multiple modules: hclge hclgevf scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_dcbnl.o is added to multiple modules: rvu_nicpf rvu_nicvf scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_devlink.o is added to multiple modules: rvu_nicpf rvu_nicvf scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ale.o is added to multiple modules: keystone_netcp keystone_netcp_ethss ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ethtool.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_priv.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_sl.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: davinci_cpdma.o is added to multiple modules: ti_cpsw ti_cpsw_new ti_davinci_emac scripts/Makefile.build:252: ./drivers/platform/x86/intel/int3472/Makefile: common.o is added to multiple modules: intel_skl_int3472_discrete intel_skl_int3472_tps68470 scripts/Makefile.build:252: ./sound/soc/codecs/Makefile: wcd-clsh-v2.o is added to multiple modules: snd-soc-wcd9335 snd-soc-wcd934x snd-soc-wcd938x Once all the warnings are fixed, it can become an error without the W= option. Signed-off-by: Masahiro Yamada Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin Reviewed-by: Nicolas Schier --- scripts/Makefile.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 37cf88d076e87..799df12b53f39 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -222,6 +222,10 @@ endif cmd_check_local_export = $(srctree)/scripts/check-local-export $@ +ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) +cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi))) +endif + define rule_cc_o_c $(call cmd_and_fixdep,cc_o_c) $(call cmd,gen_ksymdeps) @@ -231,6 +235,7 @@ define rule_cc_o_c $(call cmd,gen_objtooldep) $(call cmd,gen_symversions_c) $(call cmd,record_mcount) + $(call cmd,warn_shared_object) endef define rule_as_o_S @@ -239,6 +244,7 @@ define rule_as_o_S $(call cmd,check_local_export) $(call cmd,gen_objtooldep) $(call cmd,gen_symversions_S) + $(call cmd,warn_shared_object) endef # Built-in and composite module parts -- GitLab From 73f5fc5f884ad0c5f7d57f66303af64f9f002526 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 21 Nov 2022 08:20:22 +0000 Subject: [PATCH 308/694] iommu/fsl_pamu: Fix resource leak in fsl_pamu_probe() The fsl_pamu_probe() returns directly when create_csd() failed, leaving irq and memories unreleased. Fix by jumping to error if create_csd() returns error. Fixes: 695093e38c3e ("iommu/fsl: Freescale PAMU driver and iommu implementation.") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221121082022.19091-1-yuancan@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 1b53d2da2c191..1a8c85d54123f 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -868,7 +868,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) ret = create_csd(ppaact_phys, mem_size, csd_port_id); if (ret) { dev_err(dev, "could not create coherence subdomain\n"); - return ret; + goto error; } } -- GitLab From a39818a3fb2bf12ae945a7c5fba8c5d9048a0e96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Nov 2022 21:26:10 +1100 Subject: [PATCH 309/694] objtool/powerpc: Implement arch_pc_relative_reloc() Provide an implementation for arch_pc_relative_reloc(). It is needed to pass the build once 61c6065ef7ec ("objtool: Allow !PC relative relocations") is merged. Signed-off-by: Michael Ellerman --- tools/objtool/arch/powerpc/decode.c | 9 +++++++++ tools/objtool/include/objtool/arch.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c index 01cade98b49e8..9c653805a08a9 100644 --- a/tools/objtool/arch/powerpc/decode.c +++ b/tools/objtool/arch/powerpc/decode.c @@ -82,6 +82,15 @@ unsigned long arch_jump_destination(struct instruction *insn) return insn->offset + insn->immediate; } +bool arch_pc_relative_reloc(struct reloc *reloc) +{ + /* + * The powerpc build only allows certain relocation types, see + * relocs_check.sh, and none of those accepted are PC relative. + */ + return false; +} + void arch_initial_func_cfi_state(struct cfi_init_state *state) { int i; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 5149330f400f5..4ecb480131c7c 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -95,4 +95,6 @@ bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); +bool arch_pc_relative_reloc(struct reloc *reloc); + #endif /* _ARCH_H */ -- GitLab From b3ad31f33982497dbc7a66a9d3013b1ac6985dfe Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:28 +0800 Subject: [PATCH 310/694] soundwire: intel: start using hw_ops Before introducing new hardware with completely different register spaces and programming sequences, we need to abstract some of the existing routines in hw_ops that will be platform-specific. For now we only use the 'cnl' ops - after the first Intel platform with SoundWire capabilities. Rather than one big intrusive patch, hw_ops are introduced in this patch so show the dependencies between drivers. Follow-up patches will introduce callbacks for debugfs, power and bus management. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 38 +++++++++++++++++++++++------ drivers/soundwire/intel.h | 3 +++ drivers/soundwire/intel_init.c | 1 + include/linux/soundwire/sdw_intel.h | 15 ++++++++++++ sound/soc/sof/intel/hda.c | 2 ++ 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index b9cb7e31ddb35..f88319f8ded49 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -745,10 +745,10 @@ static int intel_free_stream(struct sdw_intel *sdw, * bank switch routines */ -static int intel_pre_bank_switch(struct sdw_bus *bus) +static int intel_pre_bank_switch(struct sdw_intel *sdw) { - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_cdns *cdns = &sdw->cdns; + struct sdw_bus *bus = &cdns->bus; /* Write to register only for multi-link */ if (!bus->multi_link) @@ -759,10 +759,10 @@ static int intel_pre_bank_switch(struct sdw_bus *bus) return 0; } -static int intel_post_bank_switch(struct sdw_bus *bus) +static int intel_post_bank_switch(struct sdw_intel *sdw) { - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_cdns *cdns = &sdw->cdns; + struct sdw_bus *bus = &cdns->bus; void __iomem *shim = sdw->link_res->shim; int sync_reg, ret; @@ -1422,6 +1422,28 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) return 0; } +const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { + .pre_bank_switch = intel_pre_bank_switch, + .post_bank_switch = intel_post_bank_switch, +}; +EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL); + +static int generic_pre_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->pre_bank_switch(sdw); +} + +static int generic_post_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->post_bank_switch(sdw); +} + static int sdw_master_read_intel_prop(struct sdw_bus *bus) { struct sdw_master_prop *prop = &bus->prop; @@ -1477,8 +1499,8 @@ static struct sdw_master_ops sdw_intel_ops = { .xfer_msg_defer = cdns_xfer_msg_defer, .reset_page_addr = cdns_reset_page_addr, .set_bus_conf = cdns_bus_conf, - .pre_bank_switch = intel_pre_bank_switch, - .post_bank_switch = intel_post_bank_switch, + .pre_bank_switch = generic_pre_bank_switch, + .post_bank_switch = generic_post_bank_switch, .read_ping_status = cdns_read_ping_status, }; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index cd93a44dba9a1..3170df76b4110 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -7,6 +7,7 @@ /** * struct sdw_intel_link_res - Soundwire Intel link resource structure, * typically populated by the controller driver. + * @hw_ops: platform-specific ops * @mmio_base: mmio base of SoundWire registers * @registers: Link IO registers base * @shim: Audio shim pointer @@ -22,6 +23,8 @@ * @list: used to walk-through all masters exposed by the same controller */ struct sdw_intel_link_res { + const struct sdw_intel_hw_ops *hw_ops; + void __iomem *mmio_base; /* not strictly needed, useful for debug */ void __iomem *registers; void __iomem *shim; diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index d091513919df5..1e6d74b3e7732 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -60,6 +60,7 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res * /* Add link information used in the driver probe */ link = &ldev->link_res; + link->hw_ops = res->hw_ops; link->mmio_base = res->mmio_base; link->registers = res->mmio_base + SDW_LINK_BASE + (SDW_LINK_SIZE * link_id); diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2e9fd91572d4c..2dbe34b41ef14 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -233,6 +233,7 @@ struct sdw_intel_ctx { * struct sdw_intel_res - Soundwire Intel global resource structure, * typically populated by the DSP driver * + * @hw_ops: abstraction for platform ops * @count: link count * @mmio_base: mmio base of SoundWire registers * @irq: interrupt number @@ -249,6 +250,7 @@ struct sdw_intel_ctx { * @alh_base: sdw alh base. */ struct sdw_intel_res { + const struct sdw_intel_hw_ops *hw_ops; int count; void __iomem *mmio_base; int irq; @@ -292,4 +294,17 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id); #define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1) +struct sdw_intel; + +/* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. + * @pre_bank_switch: helper for bus management + * @post_bank_switch: helper for bus management + */ +struct sdw_intel_hw_ops { + int (*pre_bank_switch)(struct sdw_intel *sdw); + int (*post_bank_switch)(struct sdw_intel *sdw); +}; + +extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; + #endif diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1188ec51816bd..3d6254489056a 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -188,6 +188,7 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev) memset(&res, 0, sizeof(res)); + res.hw_ops = &sdw_intel_cnl_hw_ops; res.mmio_base = sdev->bar[HDA_DSP_BAR]; res.shim_base = hdev->desc->sdw_shim_base; res.alh_base = hdev->desc->sdw_alh_base; @@ -1694,3 +1695,4 @@ MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI); MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT); +MODULE_IMPORT_NS(SOUNDWIRE_INTEL); -- GitLab From fb2dc6a0a5f885233d632b1e92be9c0be977b0dc Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:29 +0800 Subject: [PATCH 311/694] soundwire: intel: add debugfs callbacks in hw_ops No functionality change, only add indirection for debugfs helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 7 +++++-- drivers/soundwire/intel.h | 16 ++++++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index f88319f8ded49..914f2fb43721a 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1423,6 +1423,9 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) } const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { + .debugfs_init = intel_debugfs_init, + .debugfs_exit = intel_debugfs_exit, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) goto err_power_up; } - intel_debugfs_init(sdw); + sdw_intel_debugfs_init(sdw); /* start bus */ ret = intel_start_bus(sdw); @@ -1685,7 +1688,7 @@ static void intel_link_remove(struct auxiliary_device *auxdev) * SDW_INTEL_CLK_STOP_NOT_ALLOWED */ if (!bus->prop.hw_disabled) { - intel_debugfs_exit(sdw); + sdw_intel_debugfs_exit(sdw); sdw_cdns_enable_interrupt(cdns, false); } sdw_bus_master_delete(bus); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 3170df76b4110..5548b8451d014 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -61,4 +61,20 @@ struct sdw_intel_link_dev { #define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) +#define SDW_INTEL_CHECK_OPS(sdw, cb) ((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \ + (sdw)->link_res->hw_ops->cb) +#define SDW_INTEL_OPS(sdw, cb) ((sdw)->link_res->hw_ops->cb) + +static inline void sdw_intel_debugfs_init(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, debugfs_init)) + SDW_INTEL_OPS(sdw, debugfs_init)(sdw); +} + +static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, debugfs_exit)) + SDW_INTEL_OPS(sdw, debugfs_exit)(sdw); +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2dbe34b41ef14..211924e4ebf21 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -297,10 +297,15 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id); struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. + * @debugfs_init: initialize all debugfs capabilities + * @debugfs_exit: close and cleanup debugfs capabilities * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ struct sdw_intel_hw_ops { + void (*debugfs_init)(struct sdw_intel *sdw); + void (*debugfs_exit)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; -- GitLab From b6234bcc6589a0719ec91d810114c0b556a5b88b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:30 +0800 Subject: [PATCH 312/694] soundwire: intel: add register_dai callback in hw_ops No functionality change, only add indirection for DAI registration helper. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 4 +++- drivers/soundwire/intel.h | 7 +++++++ include/linux/soundwire/sdw_intel.h | 3 +++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 914f2fb43721a..0496eb0d60847 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1426,6 +1426,8 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .debugfs_init = intel_debugfs_init, .debugfs_exit = intel_debugfs_exit, + .register_dai = intel_register_dai, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1611,7 +1613,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) goto err_init; /* Register DAIs */ - ret = intel_register_dai(sdw); + ret = sdw_intel_register_dai(sdw); if (ret) { dev_err(dev, "DAI registration failed: %d\n", ret); goto err_power_up; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 5548b8451d014..0521cab311a33 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -77,4 +77,11 @@ static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw) SDW_INTEL_OPS(sdw, debugfs_exit)(sdw); } +static inline int sdw_intel_register_dai(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, register_dai)) + return SDW_INTEL_OPS(sdw, register_dai)(sdw); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 211924e4ebf21..5be63d4fe62ec 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -299,6 +299,7 @@ struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities + * @register_dai: read all PDI information and register DAIs * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -306,6 +307,8 @@ struct sdw_intel_hw_ops { void (*debugfs_init)(struct sdw_intel *sdw); void (*debugfs_exit)(struct sdw_intel *sdw); + int (*register_dai)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; -- GitLab From 3db0c5a6a2832c7b4b40676299e4bbbe1a96bc8b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:31 +0800 Subject: [PATCH 313/694] soundwire: intel: add bus management callbacks in hw_ops No functionality change, only add indirection for bus management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 24 ++++++++++++-------- drivers/soundwire/intel.h | 34 +++++++++++++++++++++++++++++ include/linux/soundwire/sdw_intel.h | 11 ++++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 0496eb0d60847..6d2fdf3a01fd8 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1428,6 +1428,12 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .register_dai = intel_register_dai, + .check_clock_stop = intel_check_clock_stop, + .start_bus = intel_start_bus, + .start_bus_after_reset = intel_start_bus_after_reset, + .start_bus_after_clock_stop = intel_start_bus_after_clock_stop, + .stop_bus = intel_stop_bus, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1622,7 +1628,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) sdw_intel_debugfs_init(sdw); /* start bus */ - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret) { dev_err(dev, "bus start failed: %d\n", ret); goto err_power_up; @@ -1850,7 +1856,7 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - ret = intel_stop_bus(sdw, false); + ret = sdw_intel_stop_bus(sdw, false); if (ret < 0) { dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); return ret; @@ -1876,14 +1882,14 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev) clock_stop_quirks = sdw->link_res->clock_stop_quirks; if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = intel_stop_bus(sdw, false); + ret = sdw_intel_stop_bus(sdw, false); if (ret < 0) { dev_err(dev, "%s: cannot stop bus during teardown: %d\n", __func__, ret); return ret; } } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { - ret = intel_stop_bus(sdw, true); + ret = sdw_intel_stop_bus(sdw, true); if (ret < 0) { dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", __func__, ret); @@ -1941,7 +1947,7 @@ static int __maybe_unused intel_resume(struct device *dev) */ sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "cannot start bus during resume\n"); intel_link_power_down(sdw); @@ -1995,7 +2001,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) */ sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - ret = intel_start_bus(sdw); + ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); intel_link_power_down(sdw); @@ -2010,7 +2016,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) return ret; } - ret = intel_start_bus_after_reset(sdw); + ret = sdw_intel_start_bus_after_reset(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); intel_link_power_down(sdw); @@ -2018,7 +2024,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) } } else if (!clock_stop_quirks) { - intel_check_clock_stop(sdw); + sdw_intel_check_clock_stop(sdw); ret = intel_link_power_up(sdw); if (ret) { @@ -2026,7 +2032,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) return ret; } - ret = intel_start_bus_after_clock_stop(sdw); + ret = sdw_intel_start_bus_after_clock_stop(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); intel_link_power_down(sdw); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 0521cab311a33..99a2d875a3316 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -84,4 +84,38 @@ static inline int sdw_intel_register_dai(struct sdw_intel *sdw) return -ENOTSUPP; } +static inline void sdw_intel_check_clock_stop(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, check_clock_stop)) + SDW_INTEL_OPS(sdw, check_clock_stop)(sdw); +} + +static inline int sdw_intel_start_bus(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus)) + return SDW_INTEL_OPS(sdw, start_bus)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_start_bus_after_reset(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_reset)) + return SDW_INTEL_OPS(sdw, start_bus_after_reset)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_start_bus_after_clock_stop(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_clock_stop)) + return SDW_INTEL_OPS(sdw, start_bus_after_clock_stop)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) +{ + if (SDW_INTEL_CHECK_OPS(sdw, stop_bus)) + return SDW_INTEL_OPS(sdw, stop_bus)(sdw, clock_stop); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 5be63d4fe62ec..cee61bc9af8ad 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -300,6 +300,11 @@ struct sdw_intel; * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities * @register_dai: read all PDI information and register DAIs + * @check_clock_stop: throw error message if clock is not stopped. + * @start_bus: normal start + * @start_bus_after_reset: start after reset + * @start_bus_after_clock_stop: start after mode0 clock stop + * @stop_bus: stop all bus * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -309,6 +314,12 @@ struct sdw_intel_hw_ops { int (*register_dai)(struct sdw_intel *sdw); + void (*check_clock_stop)(struct sdw_intel *sdw); + int (*start_bus)(struct sdw_intel *sdw); + int (*start_bus_after_reset)(struct sdw_intel *sdw); + int (*start_bus_after_clock_stop)(struct sdw_intel *sdw); + int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; -- GitLab From 49c9ff45991a5a62e040c8b43c89a9ab38a0a91f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:32 +0800 Subject: [PATCH 314/694] soundwire: intel: add link power management callbacks in hw_ops No functionality change, only add indirection for link power management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 23 +++++++++++++---------- drivers/soundwire/intel.h | 14 ++++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 6d2fdf3a01fd8..2320f1b8a2d10 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1434,6 +1434,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .start_bus_after_clock_stop = intel_start_bus_after_clock_stop, .stop_bus = intel_stop_bus, + .link_power_up = intel_link_power_up, + .link_power_down = intel_link_power_down, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) bus->multi_link = multi_link; /* Initialize shim, controller */ - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) goto err_init; @@ -1679,7 +1682,7 @@ int intel_link_startup(struct auxiliary_device *auxdev) return 0; err_power_up: - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); err_init: return ret; } @@ -1935,7 +1938,7 @@ static int __maybe_unused intel_resume(struct device *dev) pm_runtime_idle(dev); } - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); return ret; @@ -1950,7 +1953,7 @@ static int __maybe_unused intel_resume(struct device *dev) ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "cannot start bus during resume\n"); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } @@ -1989,7 +1992,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) clock_stop_quirks = sdw->link_res->clock_stop_quirks; if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); return ret; @@ -2004,13 +2007,13 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); return ret; @@ -2019,14 +2022,14 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus_after_reset(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else if (!clock_stop_quirks) { sdw_intel_check_clock_stop(sdw); - ret = intel_link_power_up(sdw); + ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); return ret; @@ -2035,7 +2038,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) ret = sdw_intel_start_bus_after_clock_stop(sdw); if (ret < 0) { dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); - intel_link_power_down(sdw); + sdw_intel_link_power_down(sdw); return ret; } } else { diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 99a2d875a3316..0f63e75841321 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -118,4 +118,18 @@ static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop) return -ENOTSUPP; } +static inline int sdw_intel_link_power_up(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, link_power_up)) + return SDW_INTEL_OPS(sdw, link_power_up)(sdw); + return -ENOTSUPP; +} + +static inline int sdw_intel_link_power_down(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, link_power_down)) + return SDW_INTEL_OPS(sdw, link_power_down)(sdw); + return -ENOTSUPP; +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index cee61bc9af8ad..81430201b8b9d 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -305,6 +305,8 @@ struct sdw_intel; * @start_bus_after_reset: start after reset * @start_bus_after_clock_stop: start after mode0 clock stop * @stop_bus: stop all bus + * @link_power_up: power-up using chip-specific helpers + * @link_power_down: power-down with chip-specific helpers * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -320,6 +322,9 @@ struct sdw_intel_hw_ops { int (*start_bus_after_clock_stop)(struct sdw_intel *sdw); int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop); + int (*link_power_up)(struct sdw_intel *sdw); + int (*link_power_down)(struct sdw_intel *sdw); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; -- GitLab From 36e3b385f35a33a10b792ec46350dd87d79e84dd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:33 +0800 Subject: [PATCH 315/694] soundwire: intel: add in-band wake callbacks in hw_ops No functionality change, only add indirection for in-band wake management helpers. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-7-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 11 +++++++---- drivers/soundwire/intel.h | 13 +++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 2320f1b8a2d10..ea6479b4010de 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1437,6 +1437,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { .link_power_up = intel_link_power_up, .link_power_down = intel_link_power_down, + .shim_check_wake = intel_shim_check_wake, + .shim_wake = intel_shim_wake, + .pre_bank_switch = intel_pre_bank_switch, .post_bank_switch = intel_post_bank_switch, }; @@ -1720,11 +1723,11 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) return 0; } - if (!intel_shim_check_wake(sdw)) + if (!sdw_intel_shim_check_wake(sdw)) return 0; /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); /* * resume the Master, which will generate a bus reset and result in @@ -1852,7 +1855,7 @@ static int __maybe_unused intel_suspend(struct device *dev) */ dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); } else { - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); } } @@ -1987,7 +1990,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev) } /* unconditionally disable WAKEEN interrupt */ - intel_shim_wake(sdw, false); + sdw_intel_shim_wake(sdw, false); clock_stop_quirks = sdw->link_res->clock_stop_quirks; diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 0f63e75841321..9ac3397757a07 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -132,4 +132,17 @@ static inline int sdw_intel_link_power_down(struct sdw_intel *sdw) return -ENOTSUPP; } +static inline int sdw_intel_shim_check_wake(struct sdw_intel *sdw) +{ + if (SDW_INTEL_CHECK_OPS(sdw, shim_check_wake)) + return SDW_INTEL_OPS(sdw, shim_check_wake)(sdw); + return -ENOTSUPP; +} + +static inline void sdw_intel_shim_wake(struct sdw_intel *sdw, bool wake_enable) +{ + if (SDW_INTEL_CHECK_OPS(sdw, shim_wake)) + SDW_INTEL_OPS(sdw, shim_wake)(sdw, wake_enable); +} + #endif /* __SDW_INTEL_LOCAL_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 81430201b8b9d..0942cd4640953 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -307,6 +307,8 @@ struct sdw_intel; * @stop_bus: stop all bus * @link_power_up: power-up using chip-specific helpers * @link_power_down: power-down with chip-specific helpers + * @shim_check_wake: check if a wake was received + * @shim_wake: enable/disable in-band wake management * @pre_bank_switch: helper for bus management * @post_bank_switch: helper for bus management */ @@ -325,6 +327,9 @@ struct sdw_intel_hw_ops { int (*link_power_up)(struct sdw_intel *sdw); int (*link_power_down)(struct sdw_intel *sdw); + int (*shim_check_wake)(struct sdw_intel *sdw); + void (*shim_wake)(struct sdw_intel *sdw, bool wake_enable); + int (*pre_bank_switch)(struct sdw_intel *sdw); int (*post_bank_switch)(struct sdw_intel *sdw); }; -- GitLab From 7cbf00bd4142cd88ac7ecbc4ea7b917a220cb721 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 11 Nov 2022 09:31:34 +0800 Subject: [PATCH 316/694] soundwire: intel: split auxdevice to different file The auxdevice layer is completely generic, it should be split from intel.c which is only geared to the 'cnl' hw_ops now. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221111013135.38289-8-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/Makefile | 2 +- drivers/soundwire/intel.c | 657 --------------------------- drivers/soundwire/intel.h | 11 +- drivers/soundwire/intel_auxdevice.c | 678 ++++++++++++++++++++++++++++ drivers/soundwire/intel_auxdevice.h | 18 + drivers/soundwire/intel_init.c | 1 + 6 files changed, 700 insertions(+), 667 deletions(-) create mode 100644 drivers/soundwire/intel_auxdevice.c create mode 100644 drivers/soundwire/intel_auxdevice.h diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile index 986776787b9ea..ca97414ada705 100644 --- a/drivers/soundwire/Makefile +++ b/drivers/soundwire/Makefile @@ -20,7 +20,7 @@ soundwire-cadence-y := cadence_master.o obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o #Intel driver -soundwire-intel-y := intel.o intel_init.o dmi-quirks.o +soundwire-intel-y := intel.o intel_auxdevice.o intel_init.o dmi-quirks.o obj-$(CONFIG_SOUNDWIRE_INTEL) += soundwire-intel.o #Qualcomm driver diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index ea6479b4010de..bc9c50bacc494 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -8,10 +8,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -22,27 +19,6 @@ #include "bus.h" #include "intel.h" -/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */ -#define INTEL_DEV_NUM_IDA_MIN 4 - -#define INTEL_MASTER_SUSPEND_DELAY_MS 3000 -#define INTEL_MASTER_RESET_ITERATIONS 10 - -/* - * debug/config flags for the Intel SoundWire Master. - * - * Since we may have multiple masters active, we can have up to 8 - * flags reused in each byte, with master0 using the ls-byte, etc. - */ - -#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME BIT(0) -#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP BIT(1) -#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE BIT(2) -#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK BIT(3) - -static int md_flags; -module_param_named(sdw_md_flags, md_flags, int, 0444); -MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)"); enum intel_pdi_type { INTEL_PDI_IN = 0, @@ -1445,636 +1421,3 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = { }; EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL); -static int generic_pre_bank_switch(struct sdw_bus *bus) -{ - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); - - return sdw->link_res->hw_ops->pre_bank_switch(sdw); -} - -static int generic_post_bank_switch(struct sdw_bus *bus) -{ - struct sdw_cdns *cdns = bus_to_cdns(bus); - struct sdw_intel *sdw = cdns_to_intel(cdns); - - return sdw->link_res->hw_ops->post_bank_switch(sdw); -} - -static int sdw_master_read_intel_prop(struct sdw_bus *bus) -{ - struct sdw_master_prop *prop = &bus->prop; - struct fwnode_handle *link; - char name[32]; - u32 quirk_mask; - - /* Find master handle */ - snprintf(name, sizeof(name), - "mipi-sdw-link-%d-subproperties", bus->link_id); - - link = device_get_named_child_node(bus->dev, name); - if (!link) { - dev_err(bus->dev, "Master node %s not found\n", name); - return -EIO; - } - - fwnode_property_read_u32(link, - "intel-sdw-ip-clock", - &prop->mclk_freq); - - /* the values reported by BIOS are the 2x clock, not the bus clock */ - prop->mclk_freq /= 2; - - fwnode_property_read_u32(link, - "intel-quirk-mask", - &quirk_mask); - - if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) - prop->hw_disabled = true; - - prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH | - SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; - - return 0; -} - -static int intel_prop_read(struct sdw_bus *bus) -{ - /* Initialize with default handler to read all DisCo properties */ - sdw_master_read_prop(bus); - - /* read Intel-specific properties */ - sdw_master_read_intel_prop(bus); - - return 0; -} - -static struct sdw_master_ops sdw_intel_ops = { - .read_prop = intel_prop_read, - .override_adr = sdw_dmi_override_adr, - .xfer_msg = cdns_xfer_msg, - .xfer_msg_defer = cdns_xfer_msg_defer, - .reset_page_addr = cdns_reset_page_addr, - .set_bus_conf = cdns_bus_conf, - .pre_bank_switch = generic_pre_bank_switch, - .post_bank_switch = generic_post_bank_switch, - .read_ping_status = cdns_read_ping_status, -}; - -/* - * probe and init (aux_dev_id argument is required by function prototype but not used) - */ -static int intel_link_probe(struct auxiliary_device *auxdev, - const struct auxiliary_device_id *aux_dev_id) - -{ - struct device *dev = &auxdev->dev; - struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev); - struct sdw_intel *sdw; - struct sdw_cdns *cdns; - struct sdw_bus *bus; - int ret; - - sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL); - if (!sdw) - return -ENOMEM; - - cdns = &sdw->cdns; - bus = &cdns->bus; - - sdw->instance = auxdev->id; - sdw->link_res = &ldev->link_res; - cdns->dev = dev; - cdns->registers = sdw->link_res->registers; - cdns->instance = sdw->instance; - cdns->msg_count = 0; - - bus->link_id = auxdev->id; - bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN; - bus->clk_stop_timeout = 1; - - sdw_cdns_probe(cdns); - - /* Set ops */ - bus->ops = &sdw_intel_ops; - - /* set driver data, accessed by snd_soc_dai_get_drvdata() */ - auxiliary_set_drvdata(auxdev, cdns); - - /* use generic bandwidth allocation algorithm */ - sdw->cdns.bus.compute_params = sdw_compute_params; - - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - - ret = sdw_bus_master_add(bus, dev, dev->fwnode); - if (ret) { - dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); - return ret; - } - - if (bus->prop.hw_disabled) - dev_info(dev, - "SoundWire master %d is disabled, will be ignored\n", - bus->link_id); - /* - * Ignore BIOS err_threshold, it's a really bad idea when dealing - * with multiple hardware synchronized links - */ - bus->prop.err_threshold = 0; - - return 0; -} - -int intel_link_startup(struct auxiliary_device *auxdev) -{ - struct device *dev = &auxdev->dev; - struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - int link_flags; - bool multi_link; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled) { - dev_info(dev, - "SoundWire master %d is disabled, ignoring\n", - sdw->instance); - return 0; - } - - link_flags = md_flags >> (bus->link_id * 8); - multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK); - if (!multi_link) { - dev_dbg(dev, "Multi-link is disabled\n"); - } else { - /* - * hardware-based synchronization is required regardless - * of the number of segments used by a stream: SSP-based - * synchronization is gated by gsync when the multi-master - * mode is set. - */ - bus->hw_sync_min_links = 1; - } - bus->multi_link = multi_link; - - /* Initialize shim, controller */ - ret = sdw_intel_link_power_up(sdw); - if (ret) - goto err_init; - - /* Register DAIs */ - ret = sdw_intel_register_dai(sdw); - if (ret) { - dev_err(dev, "DAI registration failed: %d\n", ret); - goto err_power_up; - } - - sdw_intel_debugfs_init(sdw); - - /* start bus */ - ret = sdw_intel_start_bus(sdw); - if (ret) { - dev_err(dev, "bus start failed: %d\n", ret); - goto err_power_up; - } - - /* Enable runtime PM */ - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) { - pm_runtime_set_autosuspend_delay(dev, - INTEL_MASTER_SUSPEND_DELAY_MS); - pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); - - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) { - /* - * To keep the clock running we need to prevent - * pm_runtime suspend from happening by increasing the - * reference count. - * This quirk is specified by the parent PCI device in - * case of specific latency requirements. It will have - * no effect if pm_runtime is disabled by the user via - * a module parameter for testing purposes. - */ - pm_runtime_get_noresume(dev); - } - - /* - * The runtime PM status of Slave devices is "Unsupported" - * until they report as ATTACHED. If they don't, e.g. because - * there are no Slave devices populated or if the power-on is - * delayed or dependent on a power switch, the Master will - * remain active and prevent its parent from suspending. - * - * Conditionally force the pm_runtime core to re-evaluate the - * Master status in the absence of any Slave activity. A quirk - * is provided to e.g. deal with Slaves that may be powered on - * with a delay. A more complete solution would require the - * definition of Master properties. - */ - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - - sdw->startup_done = true; - return 0; - -err_power_up: - sdw_intel_link_power_down(sdw); -err_init: - return ret; -} - -static void intel_link_remove(struct auxiliary_device *auxdev) -{ - struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - - /* - * Since pm_runtime is already disabled, we don't decrease - * the refcount when the clock_stop_quirk is - * SDW_INTEL_CLK_STOP_NOT_ALLOWED - */ - if (!bus->prop.hw_disabled) { - sdw_intel_debugfs_exit(sdw); - sdw_cdns_enable_interrupt(cdns, false); - } - sdw_bus_master_delete(bus); -} - -int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) -{ - struct device *dev = &auxdev->dev; - struct sdw_intel *sdw; - struct sdw_bus *bus; - - sdw = auxiliary_get_drvdata(auxdev); - bus = &sdw->cdns.bus; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - if (!sdw_intel_shim_check_wake(sdw)) - return 0; - - /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ - sdw_intel_shim_wake(sdw, false); - - /* - * resume the Master, which will generate a bus reset and result in - * Slaves re-attaching and be re-enumerated. The SoundWire physical - * device which generated the wake will trigger an interrupt, which - * will in turn cause the corresponding Linux Slave device to be - * resumed and the Slave codec driver to check the status. - */ - pm_request_resume(dev); - - return 0; -} - -/* - * PM calls - */ - -static int intel_resume_child_device(struct device *dev, void *data) -{ - int ret; - struct sdw_slave *slave = dev_to_sdw_dev(dev); - - if (!slave->probed) { - dev_dbg(dev, "skipping device, no probed driver\n"); - return 0; - } - if (!slave->dev_num_sticky) { - dev_dbg(dev, "skipping device, never detected on bus\n"); - return 0; - } - - ret = pm_request_resume(dev); - if (ret < 0) - dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); - - return ret; -} - -static int __maybe_unused intel_pm_prepare(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (pm_runtime_suspended(dev) && - pm_runtime_suspended(dev->parent) && - ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || - !clock_stop_quirks)) { - /* - * if we've enabled clock stop, and the parent is suspended, the SHIM registers - * are not accessible and the shim wake cannot be disabled. - * The only solution is to resume the entire bus to full power - */ - - /* - * If any operation in this block fails, we keep going since we don't want - * to prevent system suspend from happening and errors should be recoverable - * on resume. - */ - - /* - * first resume the device for this link. This will also by construction - * resume the PCI parent device. - */ - ret = pm_request_resume(dev); - if (ret < 0) { - dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); - return 0; - } - - /* - * Continue resuming the entire bus (parent + child devices) to exit - * the clock stop mode. If there are no devices connected on this link - * this is a no-op. - * The resume to full power could have been implemented with a .prepare - * step in SoundWire codec drivers. This would however require a lot - * of code to handle an Intel-specific corner case. It is simpler in - * practice to add a loop at the link level. - */ - ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device); - - if (ret < 0) - dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret); - } - - return 0; -} - -static int __maybe_unused intel_suspend(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status: suspended\n"); - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || - !clock_stop_quirks) { - - if (pm_runtime_suspended(dev->parent)) { - /* - * paranoia check: this should not happen with the .prepare - * resume to full power - */ - dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); - } else { - sdw_intel_shim_wake(sdw, false); - } - } - - return 0; - } - - ret = sdw_intel_stop_bus(sdw, false); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); - return ret; - } - - return 0; -} - -static int __maybe_unused intel_suspend_runtime(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = sdw_intel_stop_bus(sdw, false); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus during teardown: %d\n", - __func__, ret); - return ret; - } - } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { - ret = sdw_intel_stop_bus(sdw, true); - if (ret < 0) { - dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", - __func__, ret); - return ret; - } - } else { - dev_err(dev, "%s clock_stop_quirks %x unsupported\n", - __func__, clock_stop_quirks); - ret = -EINVAL; - } - - return ret; -} - -static int __maybe_unused intel_resume(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - int link_flags; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - link_flags = md_flags >> (bus->link_id * 8); - - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s failed: %d\n", __func__, ret); - return ret; - } - - /* - * make sure all Slaves are tagged as UNATTACHED and provide - * reason for reinitialization - */ - sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - - ret = sdw_intel_start_bus(sdw); - if (ret < 0) { - dev_err(dev, "cannot start bus during resume\n"); - sdw_intel_link_power_down(sdw); - return ret; - } - - /* - * after system resume, the pm_runtime suspend() may kick in - * during the enumeration, before any children device force the - * master device to remain active. Using pm_runtime_get() - * routines is not really possible, since it'd prevent the - * master from suspending. - * A reasonable compromise is to update the pm_runtime - * counters and delay the pm_runtime suspend by several - * seconds, by when all enumeration should be complete. - */ - pm_runtime_mark_last_busy(dev); - - return 0; -} - -static int __maybe_unused intel_resume_runtime(struct device *dev) -{ - struct sdw_cdns *cdns = dev_get_drvdata(dev); - struct sdw_intel *sdw = cdns_to_intel(cdns); - struct sdw_bus *bus = &cdns->bus; - u32 clock_stop_quirks; - int ret; - - if (bus->prop.hw_disabled || !sdw->startup_done) { - dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", - bus->link_id); - return 0; - } - - /* unconditionally disable WAKEEN interrupt */ - sdw_intel_shim_wake(sdw, false); - - clock_stop_quirks = sdw->link_res->clock_stop_quirks; - - if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); - return ret; - } - - /* - * make sure all Slaves are tagged as UNATTACHED and provide - * reason for reinitialization - */ - sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); - - ret = sdw_intel_start_bus(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - - - } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); - return ret; - } - - ret = sdw_intel_start_bus_after_reset(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - } else if (!clock_stop_quirks) { - - sdw_intel_check_clock_stop(sdw); - - ret = sdw_intel_link_power_up(sdw); - if (ret) { - dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); - return ret; - } - - ret = sdw_intel_start_bus_after_clock_stop(sdw); - if (ret < 0) { - dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); - sdw_intel_link_power_down(sdw); - return ret; - } - } else { - dev_err(dev, "%s: clock_stop_quirks %x unsupported\n", - __func__, clock_stop_quirks); - ret = -EINVAL; - } - - return ret; -} - -static const struct dev_pm_ops intel_pm = { - .prepare = intel_pm_prepare, - SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume) - SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL) -}; - -static const struct auxiliary_device_id intel_link_id_table[] = { - { .name = "soundwire_intel.link" }, - {}, -}; -MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table); - -static struct auxiliary_driver sdw_intel_drv = { - .probe = intel_link_probe, - .remove = intel_link_remove, - .driver = { - /* auxiliary_driver_register() sets .name to be the modname */ - .pm = &intel_pm, - }, - .id_table = intel_link_id_table -}; -module_auxiliary_driver(sdw_intel_drv); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Intel Soundwire Link Driver"); diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 9ac3397757a07..de9883313c8f2 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -50,16 +50,9 @@ struct sdw_intel { #endif }; -int intel_link_startup(struct auxiliary_device *auxdev); -int intel_link_process_wakeen_event(struct auxiliary_device *auxdev); +#define cdns_to_intel(_cdns) container_of(_cdns, struct sdw_intel, cdns) -struct sdw_intel_link_dev { - struct auxiliary_device auxdev; - struct sdw_intel_link_res link_res; -}; - -#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ - container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) +#define INTEL_MASTER_RESET_ITERATIONS 10 #define SDW_INTEL_CHECK_OPS(sdw, cb) ((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \ (sdw)->link_res->hw_ops->cb) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c new file mode 100644 index 0000000000000..96c6b2112feb6 --- /dev/null +++ b/drivers/soundwire/intel_auxdevice.c @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2015-22 Intel Corporation. + +/* + * Soundwire Intel Manager Driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cadence_master.h" +#include "bus.h" +#include "intel.h" +#include "intel_auxdevice.h" + +/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */ +#define INTEL_DEV_NUM_IDA_MIN 4 + +#define INTEL_MASTER_SUSPEND_DELAY_MS 3000 + +/* + * debug/config flags for the Intel SoundWire Master. + * + * Since we may have multiple masters active, we can have up to 8 + * flags reused in each byte, with master0 using the ls-byte, etc. + */ + +#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME BIT(0) +#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP BIT(1) +#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE BIT(2) +#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK BIT(3) + +static int md_flags; +module_param_named(sdw_md_flags, md_flags, int, 0444); +MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)"); + +static int generic_pre_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->pre_bank_switch(sdw); +} + +static int generic_post_bank_switch(struct sdw_bus *bus) +{ + struct sdw_cdns *cdns = bus_to_cdns(bus); + struct sdw_intel *sdw = cdns_to_intel(cdns); + + return sdw->link_res->hw_ops->post_bank_switch(sdw); +} + +static int sdw_master_read_intel_prop(struct sdw_bus *bus) +{ + struct sdw_master_prop *prop = &bus->prop; + struct fwnode_handle *link; + char name[32]; + u32 quirk_mask; + + /* Find master handle */ + snprintf(name, sizeof(name), + "mipi-sdw-link-%d-subproperties", bus->link_id); + + link = device_get_named_child_node(bus->dev, name); + if (!link) { + dev_err(bus->dev, "Master node %s not found\n", name); + return -EIO; + } + + fwnode_property_read_u32(link, + "intel-sdw-ip-clock", + &prop->mclk_freq); + + /* the values reported by BIOS are the 2x clock, not the bus clock */ + prop->mclk_freq /= 2; + + fwnode_property_read_u32(link, + "intel-quirk-mask", + &quirk_mask); + + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) + prop->hw_disabled = true; + + prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH | + SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; + + return 0; +} + +static int intel_prop_read(struct sdw_bus *bus) +{ + /* Initialize with default handler to read all DisCo properties */ + sdw_master_read_prop(bus); + + /* read Intel-specific properties */ + sdw_master_read_intel_prop(bus); + + return 0; +} + +static struct sdw_master_ops sdw_intel_ops = { + .read_prop = intel_prop_read, + .override_adr = sdw_dmi_override_adr, + .xfer_msg = cdns_xfer_msg, + .xfer_msg_defer = cdns_xfer_msg_defer, + .reset_page_addr = cdns_reset_page_addr, + .set_bus_conf = cdns_bus_conf, + .pre_bank_switch = generic_pre_bank_switch, + .post_bank_switch = generic_post_bank_switch, + .read_ping_status = cdns_read_ping_status, +}; + +/* + * probe and init (aux_dev_id argument is required by function prototype but not used) + */ +static int intel_link_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *aux_dev_id) + +{ + struct device *dev = &auxdev->dev; + struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev); + struct sdw_intel *sdw; + struct sdw_cdns *cdns; + struct sdw_bus *bus; + int ret; + + sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL); + if (!sdw) + return -ENOMEM; + + cdns = &sdw->cdns; + bus = &cdns->bus; + + sdw->instance = auxdev->id; + sdw->link_res = &ldev->link_res; + cdns->dev = dev; + cdns->registers = sdw->link_res->registers; + cdns->instance = sdw->instance; + cdns->msg_count = 0; + + bus->link_id = auxdev->id; + bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN; + bus->clk_stop_timeout = 1; + + sdw_cdns_probe(cdns); + + /* Set ops */ + bus->ops = &sdw_intel_ops; + + /* set driver data, accessed by snd_soc_dai_get_drvdata() */ + auxiliary_set_drvdata(auxdev, cdns); + + /* use generic bandwidth allocation algorithm */ + sdw->cdns.bus.compute_params = sdw_compute_params; + + /* avoid resuming from pm_runtime suspend if it's not required */ + dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + + ret = sdw_bus_master_add(bus, dev, dev->fwnode); + if (ret) { + dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); + return ret; + } + + if (bus->prop.hw_disabled) + dev_info(dev, + "SoundWire master %d is disabled, will be ignored\n", + bus->link_id); + /* + * Ignore BIOS err_threshold, it's a really bad idea when dealing + * with multiple hardware synchronized links + */ + bus->prop.err_threshold = 0; + + return 0; +} + +int intel_link_startup(struct auxiliary_device *auxdev) +{ + struct device *dev = &auxdev->dev; + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + int link_flags; + bool multi_link; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled) { + dev_info(dev, + "SoundWire master %d is disabled, ignoring\n", + sdw->instance); + return 0; + } + + link_flags = md_flags >> (bus->link_id * 8); + multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK); + if (!multi_link) { + dev_dbg(dev, "Multi-link is disabled\n"); + } else { + /* + * hardware-based synchronization is required regardless + * of the number of segments used by a stream: SSP-based + * synchronization is gated by gsync when the multi-master + * mode is set. + */ + bus->hw_sync_min_links = 1; + } + bus->multi_link = multi_link; + + /* Initialize shim, controller */ + ret = sdw_intel_link_power_up(sdw); + if (ret) + goto err_init; + + /* Register DAIs */ + ret = sdw_intel_register_dai(sdw); + if (ret) { + dev_err(dev, "DAI registration failed: %d\n", ret); + goto err_power_up; + } + + sdw_intel_debugfs_init(sdw); + + /* start bus */ + ret = sdw_intel_start_bus(sdw); + if (ret) { + dev_err(dev, "bus start failed: %d\n", ret); + goto err_power_up; + } + + /* Enable runtime PM */ + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) { + pm_runtime_set_autosuspend_delay(dev, + INTEL_MASTER_SUSPEND_DELAY_MS); + pm_runtime_use_autosuspend(dev); + pm_runtime_mark_last_busy(dev); + + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) { + /* + * To keep the clock running we need to prevent + * pm_runtime suspend from happening by increasing the + * reference count. + * This quirk is specified by the parent PCI device in + * case of specific latency requirements. It will have + * no effect if pm_runtime is disabled by the user via + * a module parameter for testing purposes. + */ + pm_runtime_get_noresume(dev); + } + + /* + * The runtime PM status of Slave devices is "Unsupported" + * until they report as ATTACHED. If they don't, e.g. because + * there are no Slave devices populated or if the power-on is + * delayed or dependent on a power switch, the Master will + * remain active and prevent its parent from suspending. + * + * Conditionally force the pm_runtime core to re-evaluate the + * Master status in the absence of any Slave activity. A quirk + * is provided to e.g. deal with Slaves that may be powered on + * with a delay. A more complete solution would require the + * definition of Master properties. + */ + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) + pm_runtime_idle(dev); + + sdw->startup_done = true; + return 0; + +err_power_up: + sdw_intel_link_power_down(sdw); +err_init: + return ret; +} + +static void intel_link_remove(struct auxiliary_device *auxdev) +{ + struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + + /* + * Since pm_runtime is already disabled, we don't decrease + * the refcount when the clock_stop_quirk is + * SDW_INTEL_CLK_STOP_NOT_ALLOWED + */ + if (!bus->prop.hw_disabled) { + sdw_intel_debugfs_exit(sdw); + sdw_cdns_enable_interrupt(cdns, false); + } + sdw_bus_master_delete(bus); +} + +int intel_link_process_wakeen_event(struct auxiliary_device *auxdev) +{ + struct device *dev = &auxdev->dev; + struct sdw_intel *sdw; + struct sdw_bus *bus; + + sdw = auxiliary_get_drvdata(auxdev); + bus = &sdw->cdns.bus; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + if (!sdw_intel_shim_check_wake(sdw)) + return 0; + + /* disable WAKEEN interrupt ASAP to prevent interrupt flood */ + sdw_intel_shim_wake(sdw, false); + + /* + * resume the Master, which will generate a bus reset and result in + * Slaves re-attaching and be re-enumerated. The SoundWire physical + * device which generated the wake will trigger an interrupt, which + * will in turn cause the corresponding Linux Slave device to be + * resumed and the Slave codec driver to check the status. + */ + pm_request_resume(dev); + + return 0; +} + +/* + * PM calls + */ + +static int intel_resume_child_device(struct device *dev, void *data) +{ + int ret; + struct sdw_slave *slave = dev_to_sdw_dev(dev); + + if (!slave->probed) { + dev_dbg(dev, "skipping device, no probed driver\n"); + return 0; + } + if (!slave->dev_num_sticky) { + dev_dbg(dev, "skipping device, never detected on bus\n"); + return 0; + } + + ret = pm_request_resume(dev); + if (ret < 0) + dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); + + return ret; +} + +static int __maybe_unused intel_pm_prepare(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (pm_runtime_suspended(dev) && + pm_runtime_suspended(dev->parent) && + ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || + !clock_stop_quirks)) { + /* + * if we've enabled clock stop, and the parent is suspended, the SHIM registers + * are not accessible and the shim wake cannot be disabled. + * The only solution is to resume the entire bus to full power + */ + + /* + * If any operation in this block fails, we keep going since we don't want + * to prevent system suspend from happening and errors should be recoverable + * on resume. + */ + + /* + * first resume the device for this link. This will also by construction + * resume the PCI parent device. + */ + ret = pm_request_resume(dev); + if (ret < 0) { + dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret); + return 0; + } + + /* + * Continue resuming the entire bus (parent + child devices) to exit + * the clock stop mode. If there are no devices connected on this link + * this is a no-op. + * The resume to full power could have been implemented with a .prepare + * step in SoundWire codec drivers. This would however require a lot + * of code to handle an Intel-specific corner case. It is simpler in + * practice to add a loop at the link level. + */ + ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device); + + if (ret < 0) + dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret); + } + + return 0; +} + +static int __maybe_unused intel_suspend(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + if (pm_runtime_suspended(dev)) { + dev_dbg(dev, "pm_runtime status: suspended\n"); + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || + !clock_stop_quirks) { + + if (pm_runtime_suspended(dev->parent)) { + /* + * paranoia check: this should not happen with the .prepare + * resume to full power + */ + dev_err(dev, "%s: invalid config: parent is suspended\n", __func__); + } else { + sdw_intel_shim_wake(sdw, false); + } + } + + return 0; + } + + ret = sdw_intel_stop_bus(sdw, false); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret); + return ret; + } + + return 0; +} + +static int __maybe_unused intel_suspend_runtime(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { + ret = sdw_intel_stop_bus(sdw, false); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus during teardown: %d\n", + __func__, ret); + return ret; + } + } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) { + ret = sdw_intel_stop_bus(sdw, true); + if (ret < 0) { + dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n", + __func__, ret); + return ret; + } + } else { + dev_err(dev, "%s clock_stop_quirks %x unsupported\n", + __func__, clock_stop_quirks); + ret = -EINVAL; + } + + return ret; +} + +static int __maybe_unused intel_resume(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + int link_flags; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + link_flags = md_flags >> (bus->link_id * 8); + + if (pm_runtime_suspended(dev)) { + dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); + + /* follow required sequence from runtime_pm.rst */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_enable(dev); + + link_flags = md_flags >> (bus->link_id * 8); + + if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) + pm_runtime_idle(dev); + } + + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + /* + * make sure all Slaves are tagged as UNATTACHED and provide + * reason for reinitialization + */ + sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); + + ret = sdw_intel_start_bus(sdw); + if (ret < 0) { + dev_err(dev, "cannot start bus during resume\n"); + sdw_intel_link_power_down(sdw); + return ret; + } + + /* + * after system resume, the pm_runtime suspend() may kick in + * during the enumeration, before any children device force the + * master device to remain active. Using pm_runtime_get() + * routines is not really possible, since it'd prevent the + * master from suspending. + * A reasonable compromise is to update the pm_runtime + * counters and delay the pm_runtime suspend by several + * seconds, by when all enumeration should be complete. + */ + pm_runtime_mark_last_busy(dev); + + return 0; +} + +static int __maybe_unused intel_resume_runtime(struct device *dev) +{ + struct sdw_cdns *cdns = dev_get_drvdata(dev); + struct sdw_intel *sdw = cdns_to_intel(cdns); + struct sdw_bus *bus = &cdns->bus; + u32 clock_stop_quirks; + int ret; + + if (bus->prop.hw_disabled || !sdw->startup_done) { + dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n", + bus->link_id); + return 0; + } + + /* unconditionally disable WAKEEN interrupt */ + sdw_intel_shim_wake(sdw, false); + + clock_stop_quirks = sdw->link_res->clock_stop_quirks; + + if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) { + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret); + return ret; + } + + /* + * make sure all Slaves are tagged as UNATTACHED and provide + * reason for reinitialization + */ + sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET); + + ret = sdw_intel_start_bus(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + + } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) { + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret); + return ret; + } + + ret = sdw_intel_start_bus_after_reset(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + } else if (!clock_stop_quirks) { + + sdw_intel_check_clock_stop(sdw); + + ret = sdw_intel_link_power_up(sdw); + if (ret) { + dev_err(dev, "%s: power_up failed: %d\n", __func__, ret); + return ret; + } + + ret = sdw_intel_start_bus_after_clock_stop(sdw); + if (ret < 0) { + dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret); + sdw_intel_link_power_down(sdw); + return ret; + } + } else { + dev_err(dev, "%s: clock_stop_quirks %x unsupported\n", + __func__, clock_stop_quirks); + ret = -EINVAL; + } + + return ret; +} + +static const struct dev_pm_ops intel_pm = { + .prepare = intel_pm_prepare, + SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume) + SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL) +}; + +static const struct auxiliary_device_id intel_link_id_table[] = { + { .name = "soundwire_intel.link" }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table); + +static struct auxiliary_driver sdw_intel_drv = { + .probe = intel_link_probe, + .remove = intel_link_remove, + .driver = { + /* auxiliary_driver_register() sets .name to be the modname */ + .pm = &intel_pm, + }, + .id_table = intel_link_id_table +}; +module_auxiliary_driver(sdw_intel_drv); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Intel Soundwire Link Driver"); diff --git a/drivers/soundwire/intel_auxdevice.h b/drivers/soundwire/intel_auxdevice.h new file mode 100644 index 0000000000000..a00ecde955633 --- /dev/null +++ b/drivers/soundwire/intel_auxdevice.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright(c) 2015-2022 Intel Corporation. */ + +#ifndef __SDW_INTEL_AUXDEVICE_H +#define __SDW_INTEL_AUXDEVICE_H + +int intel_link_startup(struct auxiliary_device *auxdev); +int intel_link_process_wakeen_event(struct auxiliary_device *auxdev); + +struct sdw_intel_link_dev { + struct auxiliary_device auxdev; + struct sdw_intel_link_res link_res; +}; + +#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \ + container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev) + +#endif /* __SDW_INTEL_AUXDEVICE_H */ diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index 1e6d74b3e7732..b3a8db7c3b776 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -17,6 +17,7 @@ #include #include "cadence_master.h" #include "intel.h" +#include "intel_auxdevice.h" static void intel_link_dev_release(struct device *dev) { -- GitLab From 8818039f959b2efc0d6f2cb101f8061332f0c77e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 21 Nov 2022 09:48:45 -0500 Subject: [PATCH 317/694] kbuild: add ability to make source rpm buildable using koji MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes: - added new target 'srcrpm-pkg' to generate source rpm - added required build tools to spec file - removed locally compiled host tools to force their re-compile Signed-off-by: Ivan Vecera Signed-off-by: Jonathan Toppins Acked-by: Íñigo Huguet Tested-by: Ivan Vecera Signed-off-by: Masahiro Yamada --- scripts/Makefile.package | 10 ++++++++++ scripts/package/mkspec | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c221..1290f1c631fb6 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -62,6 +62,16 @@ rpm-pkg: +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ta $(KERNELPATH).tar.gz \ --define='_smp_mflags %{nil}' +# srcrpm-pkg +# --------------------------------------------------------------------------- +PHONY += srcrpm-pkg +srcrpm-pkg: + $(MAKE) clean + $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec + $(call cmd,src_tar,$(KERNELPATH),kernel.spec) + +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ts $(KERNELPATH).tar.gz \ + --define='_smp_mflags %{nil}' --define='_srcrpmdir $(srctree)' + # binrpm-pkg # --------------------------------------------------------------------------- PHONY += binrpm-pkg diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 70392fd2fd29c..dda00a948a01d 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -33,6 +33,8 @@ EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \ --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \ --exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s" +test -n "$LOCALVERSION" && MAKE="$MAKE LOCALVERSION=$LOCALVERSION" + # We can label the here-doc lines for conditional output to the spec file # # Labels: @@ -49,6 +51,9 @@ sed -e '/^DEL/d' -e 's/^\t*//' < Date: Fri, 17 Jun 2022 23:50:19 +0800 Subject: [PATCH 318/694] KVM: PPC: Use __func__ to get function's name Prefer using '"%s...", __func__' to get current function's name in output messages. Signed-off-by: XueBing Chen Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13b2c857.beb.181725bad35.Coremail.chenxuebing@jari.cn --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index e9744b41a226c..351ff0f89b007 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1202,7 +1202,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize) if (rc < 0) return rc; - resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n", + resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__, resize->hpt.virt); return 0; @@ -1443,7 +1443,7 @@ static void resize_hpt_prepare_work(struct work_struct *work) */ mutex_unlock(&kvm->arch.mmu_setup_lock); - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize_hpt_debug(resize, "%s(): order = %d\n", __func__, resize->order); err = resize_hpt_allocate(resize); @@ -1887,8 +1887,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, tmp); if (ret != H_SUCCESS) { - pr_err("kvm_htab_write ret %ld i=%ld v=%lx " - "r=%lx\n", ret, i, v, r); + pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r); goto out; } if (!mmu_ready && is_vrma_hpte(v)) { -- GitLab From 392a58f1eaab0c90b80d7ba4a03dbf6eaaeabe60 Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Thu, 23 Jun 2022 18:20:31 +0800 Subject: [PATCH 319/694] KVM: PPC: Book3S HV: XIVE: Fix spelling mistakes Change 'subsquent' to 'subsequent'. Change 'accross' to 'across'. Signed-off-by: Zhang Jiaming Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220623102031.15359-1-jiaming@nfschina.com --- arch/powerpc/kvm/book3s_xive.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 4ca23644f7525..b4b680f2d8534 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -539,7 +539,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (irq == XICS_IPI || irq == 0) { /* * This barrier orders the setting of xc->cppr vs. - * subsquent test of xc->mfrr done inside + * subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -563,7 +563,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) /* * This barrier orders both setting of in_eoi above vs, * subsequent test of guest_priority, and the setting - * of xc->cppr vs. subsquent test of xc->mfrr done inside + * of xc->cppr vs. subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -2392,7 +2392,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Now, we select a target if we have one. If we don't we * leave the interrupt untargetted. It means that an interrupt - * can become "untargetted" accross migration if it was masked + * can become "untargetted" across migration if it was masked * by set_xive() but there is little we can do about it. */ -- GitLab From 6fa1efeaa6671fb7339a6c62ceeec19e8e787963 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Sun, 3 Jul 2022 13:29:32 -0400 Subject: [PATCH 320/694] KVM: PPC: Book3s: Use arg->size directly in kvm_vm_ioctl_create_spapr_tce() The size variable is just a copy of args->size, neither size nor args are modifed, so just use args->size directly. Signed-off-by: Deming Wang [mpe: Reword change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220703172932.11329-1-wangdeming@inspur.com --- arch/powerpc/kvm/book3s_64_vio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 40864373ef876..95e738ef9062b 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,14 +294,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; struct mm_struct *mm = kvm->mm; - unsigned long npages, size = args->size; + unsigned long npages; int ret; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; - npages = kvmppc_tce_pages(size); + npages = kvmppc_tce_pages(args->size); ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -314,7 +314,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->liobn = args->liobn; stt->page_shift = args->page_shift; stt->offset = args->offset; - stt->size = size; + stt->size = args->size; stt->kvm = kvm; mutex_init(&stt->alloc_lock); INIT_LIST_HEAD_RCU(&stt->iommu_tables); -- GitLab From a96b20758b23be7e9f693218908228d6100c3c26 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 9 Jul 2022 17:56:43 +0200 Subject: [PATCH 321/694] KVM: PPC: Book3S HV: Use the bitmap API to allocate bitmaps Use bitmap_zalloc()/bitmap_free() instead of hand-writing them. It is less verbose and it improves the semantic. Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/52e843a460bc374973149b8da0bd04f9761b80b7.1657382184.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/kvm/book3s_hv_uvmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e2f11f9c3f2aa..1d67baa5557a2 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -1190,8 +1190,7 @@ int kvmppc_uvmem_init(void) pfn_first = res->start >> PAGE_SHIFT; pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); - kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), - sizeof(unsigned long), GFP_KERNEL); + kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL); if (!kvmppc_uvmem_bitmap) { ret = -ENOMEM; goto out_unmap; @@ -1215,5 +1214,5 @@ void kvmppc_uvmem_free(void) memunmap_pages(&kvmppc_uvmem_pgmap); release_mem_region(kvmppc_uvmem_pgmap.range.start, range_len(&kvmppc_uvmem_pgmap.range)); - kfree(kvmppc_uvmem_bitmap); + bitmap_free(kvmppc_uvmem_bitmap); } -- GitLab From 8daa9c1dc9b4a3422801017ca46d935073dc14c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:23 +0100 Subject: [PATCH 322/694] macintosh/ams-i2c: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-290-uwe@kleine-koenig.org --- drivers/macintosh/ams/ams-i2c.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/macintosh/ams/ams-i2c.c b/drivers/macintosh/ams/ams-i2c.c index 3ded340699fb5..a4a1035eb4123 100644 --- a/drivers/macintosh/ams/ams-i2c.c +++ b/drivers/macintosh/ams/ams-i2c.c @@ -56,8 +56,7 @@ enum ams_i2c_cmd { AMS_CMD_START, }; -static int ams_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id); +static int ams_i2c_probe(struct i2c_client *client); static void ams_i2c_remove(struct i2c_client *client); static const struct i2c_device_id ams_id[] = { @@ -70,7 +69,7 @@ static struct i2c_driver ams_i2c_driver = { .driver = { .name = "ams", }, - .probe = ams_i2c_probe, + .probe_new = ams_i2c_probe, .remove = ams_i2c_remove, .id_table = ams_id, }; @@ -155,8 +154,7 @@ static void ams_i2c_get_xyz(s8 *x, s8 *y, s8 *z) *z = ams_i2c_read(AMS_DATAZ); } -static int ams_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ams_i2c_probe(struct i2c_client *client) { int vmaj, vmin; int result; -- GitLab From 0424113fed923a2fcb699b5f3aa335d16e092f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:24 +0100 Subject: [PATCH 323/694] macintosh/therm_adt746x: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-291-uwe@kleine-koenig.org --- drivers/macintosh/therm_adt746x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index b004ea2a11025..8f5db9093c9af 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -464,9 +464,9 @@ static void thermostat_remove_files(struct thermostat *th) } -static int probe_thermostat(struct i2c_client *client, - const struct i2c_device_id *id) +static int probe_thermostat(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct device_node *np = client->dev.of_node; struct thermostat* th; const __be32 *prop; @@ -598,7 +598,7 @@ static struct i2c_driver thermostat_driver = { .driver = { .name = "therm_adt746x", }, - .probe = probe_thermostat, + .probe_new = probe_thermostat, .remove = remove_thermostat, .id_table = therm_adt746x_id, }; -- GitLab From dc9be0735c3e245fe60775307cf7842b1f9b45a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:25 +0100 Subject: [PATCH 324/694] macintosh/therm_windtunnel: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-292-uwe@kleine-koenig.org --- drivers/macintosh/therm_windtunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index b8228ca404544..22b15efcc0258 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -411,8 +411,9 @@ static const struct i2c_device_id therm_windtunnel_id[] = { MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); static int -do_probe(struct i2c_client *cl, const struct i2c_device_id *id) +do_probe(struct i2c_client *cl) { + const struct i2c_device_id *id = i2c_client_get_device_id(cl); struct i2c_adapter *adapter = cl->adapter; int ret = 0; @@ -441,7 +442,7 @@ static struct i2c_driver g4fan_driver = { .driver = { .name = "therm_windtunnel", }, - .probe = do_probe, + .probe_new = do_probe, .remove = do_remove, .id_table = therm_windtunnel_id, }; -- GitLab From 9d533bdf4a582f037327f1a38ed8cf689d67cab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:26 +0100 Subject: [PATCH 325/694] macintosh/windfarm_ad7417_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-293-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_ad7417_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c index c5c54a4ce91f2..33b4723d235e4 100644 --- a/drivers/macintosh/windfarm_ad7417_sensor.c +++ b/drivers/macintosh/windfarm_ad7417_sensor.c @@ -229,8 +229,7 @@ static void wf_ad7417_init_chip(struct wf_ad7417_priv *pv) pv->config = config; } -static int wf_ad7417_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_ad7417_probe(struct i2c_client *client) { struct wf_ad7417_priv *pv; const struct mpu_data *mpu; @@ -321,7 +320,7 @@ static struct i2c_driver wf_ad7417_driver = { .name = "wf_ad7417", .of_match_table = wf_ad7417_of_id, }, - .probe = wf_ad7417_probe, + .probe_new = wf_ad7417_probe, .remove = wf_ad7417_remove, .id_table = wf_ad7417_id, }; -- GitLab From 472e4c61d2bb4977ade8e2491953954bf9723563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:27 +0100 Subject: [PATCH 326/694] macintosh/windfarm_fcu_controls: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-294-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_fcu_controls.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c index c5b1ca5bcd732..e027d889d7e80 100644 --- a/drivers/macintosh/windfarm_fcu_controls.c +++ b/drivers/macintosh/windfarm_fcu_controls.c @@ -514,8 +514,7 @@ static int wf_fcu_init_chip(struct wf_fcu_priv *pv) return 0; } -static int wf_fcu_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_fcu_probe(struct i2c_client *client) { struct wf_fcu_priv *pv; @@ -590,7 +589,7 @@ static struct i2c_driver wf_fcu_driver = { .name = "wf_fcu", .of_match_table = wf_fcu_of_id, }, - .probe = wf_fcu_probe, + .probe_new = wf_fcu_probe, .remove = wf_fcu_remove, .id_table = wf_fcu_id, }; -- GitLab From 51a9e1755cdd8b127191030d15b74b97f7d3ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:28 +0100 Subject: [PATCH 327/694] macintosh/windfarm_lm75_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .probe_new() doesn't get the i2c_device_id * parameter, so determine that explicitly in the probe function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-295-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_lm75_sensor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 204661c8e918f..24f0a444d3122 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -87,9 +87,9 @@ static const struct wf_sensor_ops wf_lm75_ops = { .owner = THIS_MODULE, }; -static int wf_lm75_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ +static int wf_lm75_probe(struct i2c_client *client) +{ + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct wf_lm75_sensor *lm; int rc, ds1775; const char *name, *loc; @@ -177,7 +177,7 @@ static struct i2c_driver wf_lm75_driver = { .name = "wf_lm75", .of_match_table = wf_lm75_of_id, }, - .probe = wf_lm75_probe, + .probe_new = wf_lm75_probe, .remove = wf_lm75_remove, .id_table = wf_lm75_id, }; -- GitLab From 0e2211b3373ea718d2161bcc360cd4d9a3bcebc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:29 +0100 Subject: [PATCH 328/694] macintosh/windfarm_lm87_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-296-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_lm87_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c index 40d25463346ed..f37a32c2070ca 100644 --- a/drivers/macintosh/windfarm_lm87_sensor.c +++ b/drivers/macintosh/windfarm_lm87_sensor.c @@ -95,8 +95,7 @@ static const struct wf_sensor_ops wf_lm87_ops = { .owner = THIS_MODULE, }; -static int wf_lm87_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_lm87_probe(struct i2c_client *client) { struct wf_lm87_sensor *lm; const char *name = NULL, *loc; @@ -173,7 +172,7 @@ static struct i2c_driver wf_lm87_driver = { .name = "wf_lm87", .of_match_table = wf_lm87_of_id, }, - .probe = wf_lm87_probe, + .probe_new = wf_lm87_probe, .remove = wf_lm87_remove, .id_table = wf_lm87_id, }; -- GitLab From 2d7a9d780444c8f31ee6af522a92a99492d9eeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:30 +0100 Subject: [PATCH 329/694] macintosh/windfarm_max6690_sensor: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-297-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_max6690_sensor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c index c0d404ebc7922..6c5ab657b6b39 100644 --- a/drivers/macintosh/windfarm_max6690_sensor.c +++ b/drivers/macintosh/windfarm_max6690_sensor.c @@ -60,8 +60,7 @@ static const struct wf_sensor_ops wf_max6690_ops = { .owner = THIS_MODULE, }; -static int wf_max6690_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_max6690_probe(struct i2c_client *client) { const char *name, *loc; struct wf_6690_sensor *max; @@ -129,7 +128,7 @@ static struct i2c_driver wf_max6690_driver = { .name = "wf_max6690", .of_match_table = wf_max6690_of_id, }, - .probe = wf_max6690_probe, + .probe_new = wf_max6690_probe, .remove = wf_max6690_remove, .id_table = wf_max6690_id, }; -- GitLab From d05921a09a5a72805a1d669dce0fcbd66df86237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 18 Nov 2022 23:40:31 +0100 Subject: [PATCH 330/694] macintosh/windfarm_smu_sat: Convert to i2c's .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The probe function doesn't make use of the i2c_device_id * parameter so it can be trivially converted. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118224540.619276-298-uwe@kleine-koenig.org --- drivers/macintosh/windfarm_smu_sat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index be5d4593db93e..ebc4256a9e4a0 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -189,8 +189,7 @@ static const struct wf_sensor_ops wf_sat_ops = { .owner = THIS_MODULE, }; -static int wf_sat_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int wf_sat_probe(struct i2c_client *client) { struct device_node *dev = client->dev.of_node; struct wf_sat *sat; @@ -349,7 +348,7 @@ static struct i2c_driver wf_sat_driver = { .name = "wf_smu_sat", .of_match_table = wf_sat_of_id, }, - .probe = wf_sat_probe, + .probe_new = wf_sat_probe, .remove = wf_sat_remove, .id_table = wf_sat_id, }; -- GitLab From e0acfdd13474815696595206e11169736b4bca9d Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:32:39 +0800 Subject: [PATCH 331/694] macintosh/windfarm_pm81: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484359-12402-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm81.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c index e0f4743f21cc6..257fb2c695c53 100644 --- a/drivers/macintosh/windfarm_pm81.c +++ b/drivers/macintosh/windfarm_pm81.c @@ -401,7 +401,7 @@ static void wf_smu_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "windfarm: CPU PID fan config not found " "max fan speed\n"); goto fail; @@ -705,7 +705,7 @@ static int wf_init_pm(void) const struct smu_sdbp_header *hdr; hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); - if (hdr != 0) { + if (hdr) { struct smu_sdbp_sensortree *st = (struct smu_sdbp_sensortree *)&hdr[1]; wf_smu_mach_model = st->model_id; -- GitLab From 2f59562c140d3119328f869126e8e593a99a392f Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:35:54 +0800 Subject: [PATCH 332/694] macintosh/adb: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484554-13258-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/adb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 1bbb9ca08d40f..23bd0c77ac1af 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -478,7 +478,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids, if ((adb_handler[i].original_address == default_id) && (!handler_id || (handler_id == adb_handler[i].handler_id) || try_handler_change(i, handler_id))) { - if (adb_handler[i].handler != 0) { + if (adb_handler[i].handler) { pr_err("Two handlers for ADB device %d\n", default_id); continue; @@ -673,7 +673,7 @@ static int adb_open(struct inode *inode, struct file *file) goto out; } state = kmalloc(sizeof(struct adbdev_state), GFP_KERNEL); - if (state == 0) { + if (!state) { ret = -ENOMEM; goto out; } -- GitLab From 88316944c3b3aa3ce3249c51689ef1621049df9d Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:42:33 +0800 Subject: [PATCH 333/694] macintosh/windfarm_pm91: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647484953-15249-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm91.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c index c8535855360d1..120a9cfba0c54 100644 --- a/drivers/macintosh/windfarm_pm91.c +++ b/drivers/macintosh/windfarm_pm91.c @@ -150,7 +150,7 @@ static void wf_smu_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "windfarm: CPU PID fan config not found " "max fan speed\n"); goto fail; -- GitLab From a823307bf0a3b79b27eea916bf6499ba4377cdf9 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 10:57:02 +0800 Subject: [PATCH 334/694] macintosh/windfarm_pm121: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647485822-16717-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/windfarm_pm121.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index 36312f163aac8..82500417ebeec 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c @@ -651,7 +651,7 @@ static void pm121_create_cpu_fans(void) /* First, locate the PID params in SMU SBD */ hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); - if (hdr == 0) { + if (!hdr) { printk(KERN_WARNING "pm121: CPU PID fan config not found.\n"); goto fail; } @@ -970,7 +970,7 @@ static int pm121_init_pm(void) const struct smu_sdbp_header *hdr; hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); - if (hdr != 0) { + if (hdr) { struct smu_sdbp_sensortree *st = (struct smu_sdbp_sensortree *)&hdr[1]; pm121_mach_model = st->model_id; -- GitLab From fc21ed8f26d980428f9b4e08e0fb72c7f7ffc9b8 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 17 Mar 2022 17:24:49 +0800 Subject: [PATCH 335/694] macintosh/macio-adb: Fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. Signed-off-by: Haowen Bai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1647509089-4280-1-git-send-email-baihaowen@meizu.com --- drivers/macintosh/macio-adb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 9b63bd2551c63..3721402582b4f 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -100,7 +100,7 @@ int macio_init(void) unsigned int irq; adbs = of_find_compatible_node(NULL, "adb", "chrp,adb0"); - if (adbs == 0) + if (!adbs) return -ENXIO; if (of_address_to_resource(adbs, 0, &r)) { @@ -183,7 +183,7 @@ static int macio_send_request(struct adb_request *req, int sync) req->reply_len = 0; spin_lock_irqsave(&macio_lock, flags); - if (current_req != 0) { + if (current_req) { last_req->next = req; last_req = req; } else { @@ -213,7 +213,8 @@ static irqreturn_t macio_adb_interrupt(int irq, void *arg) spin_lock(&macio_lock); if (in_8(&adb->intr.r) & TAG) { handled = 1; - if ((req = current_req) != 0) { + req = current_req; + if (req) { /* put the current request in */ for (i = 0; i < req->nbytes; ++i) out_8(&adb->data[i].r, req->data[i]); -- GitLab From 27f9690a81d7acf185b78be8d03d4b3a243116b1 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 21 Mar 2022 20:28:00 +1100 Subject: [PATCH 336/694] macintosh/via-pmu: Avoid compiler warnings when CONFIG_PROC_FS is disabled drivers/macintosh/via-pmu.c:897:12: warning: 'pmu_battery_proc_show' defined but not used [-Wunused-function] static int pmu_battery_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~~~~ drivers/macintosh/via-pmu.c:871:12: warning: 'pmu_irqstats_proc_show' defined but not used [-Wunused-function] static int pmu_irqstats_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~~~~~ drivers/macintosh/via-pmu.c:860:12: warning: 'pmu_info_proc_show' defined but not used [-Wunused-function] static int pmu_info_proc_show(struct seq_file *m, void *v) ^~~~~~~~~~~~~~~~~~ Add some #ifdefs to avoid unused code warnings when CONFIG_PROC_FS is disabled. Reported-by: Randy Dunlap Suggested-by: Christophe Leroy Signed-off-by: Finn Thain Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c11c0770fc4ec7e80a4b2e0ffce1055b792cfdb.1647854880.git.fthain@linux-m68k.org --- drivers/macintosh/via-pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 49657962d8920..e0cb8daf4f082 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -203,9 +203,11 @@ static int init_pmu(void); static void pmu_start(void); static irqreturn_t via_pmu_interrupt(int irq, void *arg); static irqreturn_t gpio1_interrupt(int irq, void *arg); +#ifdef CONFIG_PROC_FS static int pmu_info_proc_show(struct seq_file *m, void *v); static int pmu_irqstats_proc_show(struct seq_file *m, void *v); static int pmu_battery_proc_show(struct seq_file *m, void *v); +#endif static void pmu_pass_intr(unsigned char *data, int len); static const struct proc_ops pmu_options_proc_ops; @@ -852,6 +854,7 @@ query_battery_state(void) 2, PMU_SMART_BATTERY_STATE, pmu_cur_battery+1); } +#ifdef CONFIG_PROC_FS static int pmu_info_proc_show(struct seq_file *m, void *v) { seq_printf(m, "PMU driver version : %d\n", PMU_DRIVER_VERSION); @@ -972,6 +975,7 @@ static const struct proc_ops pmu_options_proc_ops = { .proc_release = single_release, .proc_write = pmu_options_proc_write, }; +#endif #ifdef CONFIG_ADB /* Send an ADB command */ -- GitLab From a0542d2c45a64162e63ad2d80684e57de0566271 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 16 Jun 2022 19:04:24 +0200 Subject: [PATCH 337/694] macintosh/via-pmu-backlight: Use backlight helper backlight_properties.fb_blank is deprecated. The states it represents are handled by other properties; but instead of accessing those properties directly, drivers should use the helpers provided by backlight.h. Instead of retrieving the backlight brightness in struct backlight_properties manually, and then checking whether the backlight should be on at all, use backlight_get_brightness() which does all this and insulates this from future changes. Signed-off-by: Stephen Kitt Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220616170425.1346081-1-steve@sk2.org --- drivers/macintosh/via-pmu-backlight.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c index 2194016122d2f..c2d87e7fa85be 100644 --- a/drivers/macintosh/via-pmu-backlight.c +++ b/drivers/macintosh/via-pmu-backlight.c @@ -71,12 +71,7 @@ static int pmu_backlight_get_level_brightness(int level) static int __pmu_backlight_update_status(struct backlight_device *bd) { struct adb_request req; - int level = bd->props.brightness; - - - if (bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK) - level = 0; + int level = backlight_get_brightness(bd); if (level > 0) { int pmulevel = pmu_backlight_get_level_brightness(level); -- GitLab From 2dfcace75e1e1dfbd89af63fce1bfe8aebe38427 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 7 Jul 2022 09:53:52 +0800 Subject: [PATCH 338/694] macintosh/ams/ams: Add header file macro definition Add header file macro definition. Signed-off-by: Li zeming [mpe: Add endif comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707015352.3391-1-zeming@nfschina.com --- drivers/macintosh/ams/ams.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h index 935bdd9cd9a63..2c159c8844c19 100644 --- a/drivers/macintosh/ams/ams.h +++ b/drivers/macintosh/ams/ams.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _AMS_H +#define _AMS_H + #include #include #include @@ -69,3 +72,5 @@ extern int ams_i2c_init(struct device_node *np); extern int ams_input_init(void); extern void ams_input_exit(void); + +#endif /* _AMS_H */ -- GitLab From e3e528d29d13c01289f382a0d3ddb5312ac3dae3 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 7 Jul 2022 09:59:49 +0800 Subject: [PATCH 339/694] macintosh/windfarm_pid: Add header file macro definition I think the header file could avoid redefinition errors at compile time by adding macro definitions. Signed-off-by: Li zeming [mpe: Add endif comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220707015949.3733-1-zeming@nfschina.com --- drivers/macintosh/windfarm_pid.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/macintosh/windfarm_pid.h b/drivers/macintosh/windfarm_pid.h index 83f747dbeafcc..335613a200fb0 100644 --- a/drivers/macintosh/windfarm_pid.h +++ b/drivers/macintosh/windfarm_pid.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _WINDFARM_PID_H +#define _WINDFARM_PID_H + /* * Windfarm PowerMac thermal control. Generic PID helpers * @@ -82,3 +85,5 @@ struct wf_cpu_pid_state { extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st, struct wf_cpu_pid_param *param); extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp); + +#endif /* _WINDFARM_PID_H */ -- GitLab From 3aa16303dc98b7b8baa2adbc3210fd513ec0e810 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 16 Sep 2022 22:16:38 +0800 Subject: [PATCH 340/694] macintosh: Switch to use for_each_child_of_node() macro Use for_each_child_of_node() macro instead of open coding it. No functional change. Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916141638.685575-1-yangyingliang@huawei.com --- drivers/macintosh/windfarm_smu_controls.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index e9957ad49a2af..bdd92b27da2ae 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -266,12 +266,11 @@ static int __init smu_controls_init(void) return -ENODEV; /* Look for RPM fans */ - for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + for_each_child_of_node(smu, fans) if (of_node_name_eq(fans, "rpm-fans") || of_device_is_compatible(fans, "smu-rpm-fans")) break; - for (fan = NULL; - fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + for_each_child_of_node(fans, fan) { struct smu_fan_control *fct; fct = smu_fan_create(fan, 0); @@ -286,11 +285,10 @@ static int __init smu_controls_init(void) /* Look for PWM fans */ - for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;) + for_each_child_of_node(smu, fans) if (of_node_name_eq(fans, "pwm-fans")) break; - for (fan = NULL; - fans && (fan = of_get_next_child(fans, fan)) != NULL;) { + for_each_child_of_node(fans, fan) { struct smu_fan_control *fct; fct = smu_fan_create(fan, 1); -- GitLab From 5ca86eae55a2f006e6c1edd2029b2cacb6979515 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 4 Nov 2022 11:25:51 +0800 Subject: [PATCH 341/694] macintosh: fix possible memory leak in macio_add_one_device() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically. It needs to be freed when of_device_register() fails. Call put_device() to give up the reference that's taken in device_initialize(), so that it can be freed in kobject_cleanup() when the refcount hits 0. macio device is freed in macio_release_dev(), so the kfree() can be removed. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221104032551.1075335-1-yangyingliang@huawei.com --- drivers/macintosh/macio_asic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 1ec1e5984563f..3bc1f374e6577 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -424,7 +424,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, if (of_device_register(&dev->ofdev) != 0) { printk(KERN_DEBUG"macio: device registration error for %s!\n", dev_name(&dev->ofdev.dev)); - kfree(dev); + put_device(&dev->ofdev.dev); return NULL; } -- GitLab From dbaa3105736d4d73063ea0a3b01cd7fafce924e6 Mon Sep 17 00:00:00 2001 From: Xie Shaowen Date: Tue, 2 Aug 2022 15:41:48 +0800 Subject: [PATCH 342/694] macintosh/macio-adb: check the return value of ioremap() The function ioremap() in macio_init() can fail, so its return value should be checked. Fixes: 36874579dbf4c ("[PATCH] powerpc: macio-adb build fix") Reported-by: Hacash Robot Signed-off-by: Xie Shaowen Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220802074148.3213659-1-studentxswpy@163.com --- drivers/macintosh/macio-adb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 3721402582b4f..55a9f8c3a150e 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -108,6 +108,10 @@ int macio_init(void) return -ENXIO; } adb = ioremap(r.start, sizeof(struct adb_regs)); + if (!adb) { + of_node_put(adbs); + return -ENOMEM; + } out_8(&adb->ctrl.r, 0); out_8(&adb->intr.r, 0); -- GitLab From 5836947613ef33d311b4eff6a32d019580a214f5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 29 Jan 2022 08:16:04 +0100 Subject: [PATCH 343/694] powerpc/52xx: Fix a resource leak in an error handling path The error handling path of mpc52xx_lpbfifo_probe() has a request_irq() that is not balanced by a corresponding free_irq(). Add the missing call, as already done in the remove function. Fixes: 3c9059d79f5e ("powerpc/5200: add LocalPlus bus FIFO device driver") Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dec1496d46ccd5311d0f6e9f9ca4238be11bf6a6.1643440531.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 48038aaedbd36..2875c206ac0f8 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -531,6 +531,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op) err_bcom_rx_irq: bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task); err_bcom_rx: + free_irq(lpbfifo.irq, &lpbfifo); err_irq: iounmap(lpbfifo.regs); lpbfifo.regs = NULL; -- GitLab From e75d07bd8303588c33e6f1f180a9081fb58c872e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Mar 2022 10:29:50 +0100 Subject: [PATCH 344/694] powerpc: Remove find_current_mm_pte() Last usage of find_current_mm_pte() was removed by commit 15759cb054ef ("powerpc/perf/callchain: Use __get_user_pages_fast in read_user_stack_slow") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec79f462a3bfa8365b7df505e574d5d85246bc68.1646818177.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/pte-walk.h | 25 ------------------------- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 714a35f0d425b..73c22c579a791 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -60,29 +60,4 @@ static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) return pa; } -/* - * This is what we should always use. Any other lockless page table lookup needs - * careful audit against THP split. - */ -static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *hshift) -{ - pte_t *pte; - - VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); - VM_WARN(pgdir != current->mm->pgd, - "%s lock less page table lookup called on wrong mm\n", __func__); - pte = __find_linux_pte(pgdir, ea, is_thp, hshift); - -#if defined(CONFIG_DEBUG_VM) && \ - !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)) - /* - * We should not find huge page if these configs are not enabled. - */ - if (hshift) - WARN_ON(*hshift); -#endif - return pte; -} - #endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a5892982..85c84e89e3eaf 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -100,14 +100,14 @@ static void do_serialize(void *arg) } /* - * Serialize against find_current_mm_pte which does lock-less + * Serialize against __find_linux_pte() which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. + * __find_linux_pte() to finish. */ void serialize_against_pte_lookup(struct mm_struct *mm) { -- GitLab From 4562bffb83b88e61ea9c9912e50efbd5a941f0b3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2022 22:19:18 -0800 Subject: [PATCH 345/694] powerpc/mpc52xx_lpbfifo: fix all kernel-doc warnings Fix multiple kernel-doc warnings in mpc52xx_lpbfifo.c: arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c:377: warning: expecting prototype for mpc52xx_lpbfifo_bcom_poll(). Prototype was for mpc52xx_lpbfifo_poll() instead mpc52xx_lpbfifo.c:221: warning: No description found for return value of 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:327: warning: No description found for return value of 'mpc52xx_lpbfifo_bcom_irq' mpc52xx_lpbfifo.c:398: warning: No description found for return value of 'mpc52xx_lpbfifo_submit' mpc52xx_lpbfifo.c:64: warning: Function parameter or member 'req' not described in 'mpc52xx_lpbfifo_kick' mpc52xx_lpbfifo.c:220: warning: contents before sections mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_irq' mpc52xx_lpbfifo.c:328: warning: contents before sections mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_bcom_irq' mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_bcom_irq' Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221124061918.1967-1-rdunlap@infradead.org --- arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 2875c206ac0f8..6d1dd6e874786 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -59,6 +59,8 @@ static struct mpc52xx_lpbfifo lpbfifo; /** * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred + * + * @req: Pointer to request structure */ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) { @@ -178,6 +180,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) /** * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * On transmit, the dma completion irq triggers before the fifo completion * triggers. Handle the dma completion here instead of the LPB FIFO Bestcomm @@ -216,6 +220,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) * or nested spinlock condition. The out path is non-trivial, so * extra fiddling is done to make sure all paths lead to the same * outbound code. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) { @@ -320,8 +326,12 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) /** * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * Only used when receiving data. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) { @@ -372,7 +382,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) } /** - * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion + * mpc52xx_lpbfifo_poll - Poll for DMA completion */ void mpc52xx_lpbfifo_poll(void) { @@ -393,6 +403,8 @@ EXPORT_SYMBOL(mpc52xx_lpbfifo_poll); /** * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request. * @req: Pointer to request structure + * + * Return: %0 on success, -errno code on error */ int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req) { -- GitLab From 932c6dea4f32f7d71488137c475b60a77e56bb2a Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Wed, 13 Apr 2022 06:55:07 -0400 Subject: [PATCH 346/694] powerpc/xive: remove unused parameter The parameter xc to xive_cleanup_single_escalation() is unused, so we can remove it. Signed-off-by: Deming Wang [mpe: Reword change log, unwrap lines < 90 columns] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220413105507.1729-1-wangdeming@inspur.com --- arch/powerpc/kvm/book3s_xive.c | 6 ++---- arch/powerpc/kvm/book3s_xive.h | 3 +-- arch/powerpc/kvm/book3s_xive_native.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 4ca23644f7525..d64b2dcc0e7f9 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1785,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) * stale_p (because it has no easy way to address it). Hence we have * to adjust stale_p before shutting down the interrupt. */ -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq) +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { struct irq_data *d = irq_get_irq_data(irq); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -1827,8 +1826,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 1e48f72e8aa5d..62bf39f537839 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -299,8 +299,7 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq); +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5271c33fe79e4..4f566bea5e10f 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -93,8 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Free the escalation irq */ if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); -- GitLab From 37195edebf479b94f1e20c2a83a29e4beebe7ff5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:32 +0200 Subject: [PATCH 347/694] cxl: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220521111145.81697-82-Julia.Lawall@inria.fr --- include/misc/cxl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 0410412de16b0..d8044299d654d 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -30,7 +30,7 @@ unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); /* * Context lifetime overview: * - * An AFU context may be inited and then started and stoppped multiple times + * An AFU context may be inited and then started and stopped multiple times * before it's released. ie. * - cxl_dev_context_init() * - cxl_start_context() -- GitLab From 1d09697ff22908ae487fc8c4fbde1811732be523 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 5 Jun 2022 10:00:38 +0400 Subject: [PATCH 348/694] cxl: Fix refcount leak in cxl_calc_capp_routing of_get_next_parent() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. This function only calls of_node_put() in normal path, missing it in the error path. Add missing of_node_put() to avoid refcount leak. Fixes: f24be42aab37 ("cxl: Add psl9 specific code") Signed-off-by: Miaoqian Lin Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220605060038.62217-1-linmq006@gmail.com --- drivers/misc/cxl/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 3de0aea62ade4..62385a529d869 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -387,6 +387,7 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, rc = get_phb_index(np, phb_index); if (rc) { pr_err("cxl: invalid phb index\n"); + of_node_put(np); return rc; } -- GitLab From f949ccee1dde970bc77dc871b4f0b5e651577344 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:39 +0800 Subject: [PATCH 349/694] cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter() If device_register() fails in cxl_register_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: 14baf4d9c739 ("cxl: Add guest-specific code") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145440.2426970-1-yangyingliang@huawei.com --- drivers/misc/cxl/guest.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 375f692ae9d68..fb95a2d5cef48 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -965,10 +965,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; /* * pHyp doesn't expose the programming models supported by the @@ -984,7 +984,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n afu->modes_supported = CXL_MODE_DIRECTED; if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; + goto err_remove_sysfs; adapter->afu[afu->slice] = afu; @@ -1004,10 +1004,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n return 0; -err_put2: +err_remove_sysfs: cxl_sysfs_afu_remove(afu); -err_put1: - device_unregister(&afu->dev); +err_del_dev: + device_del(&afu->dev); +err_put_dev: + put_device(&afu->dev); free = false; guest_release_serr_irq(afu); err2: @@ -1141,18 +1143,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* release the context lock as the adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: - device_unregister(&adapter->dev); +err_del_dev: + device_del(&adapter->dev); +err_put_dev: + put_device(&adapter->dev); free = false; cxl_guest_remove_chardev(adapter); err1: -- GitLab From 8bf03f557d6c6e108cf47bea32f4a68e276e1157 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:54:40 +0800 Subject: [PATCH 350/694] cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter() If device_register() fails in cxl_pci_afu|adapter(), the device is not added, device_unregister() can not be called in the error path, otherwise it will cause a null-ptr-deref because of removing not added device. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So split device_unregister() into device_del() and put_device(), then goes to put dev when register fails. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145440.2426970-2-yangyingliang@huawei.com --- drivers/misc/cxl/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 62385a529d869..0ff944860dda9 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1165,10 +1165,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) * if it returns an error! */ if ((rc = cxl_register_afu(afu))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_afu_add(afu))) - goto err_put1; + goto err_del_dev; adapter->afu[afu->slice] = afu; @@ -1177,10 +1177,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; -err_put1: +err_del_dev: + device_del(&afu->dev); +err_put_dev: pci_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - device_unregister(&afu->dev); + put_device(&afu->dev); return rc; err_free_native: @@ -1668,23 +1670,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) * even if it returns an error! */ if ((rc = cxl_register_adapter(adapter))) - goto err_put1; + goto err_put_dev; if ((rc = cxl_sysfs_adapter_add(adapter))) - goto err_put1; + goto err_del_dev; /* Release the context lock as adapter is configured */ cxl_adapter_context_unlock(adapter); return adapter; -err_put1: +err_del_dev: + device_del(&adapter->dev); +err_put_dev: /* This should mirror cxl_remove_adapter, except without the * sysfs parts */ cxl_debugfs_adapter_remove(adapter); cxl_deconfigure_adapter(adapter); - device_unregister(&adapter->dev); + put_device(&adapter->dev); return ERR_PTR(rc); err_release: -- GitLab From 295faa17722a11cac8dbf51e4c9f9405a5e07ef1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 11 Nov 2022 22:59:29 +0800 Subject: [PATCH 351/694] ocxl: fix possible name leak in ocxl_file_register_afu() If device_register() returns error in ocxl_file_register_afu(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and info is freed in info_release(). Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend") Signed-off-by: Yang Yingliang Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221111145929.2429271-1-yangyingliang@huawei.com --- drivers/misc/ocxl/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index d46dba2df5a10..452d5777a0e4c 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -541,8 +541,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) goto err_put; rc = device_register(&info->dev); - if (rc) - goto err_put; + if (rc) { + free_minor(info); + put_device(&info->dev); + return rc; + } rc = ocxl_sysfs_register_afu(info); if (rc) -- GitLab From 5f58cad1e4c65bebee34292696c6d2105eeb2027 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 23:43:39 +0800 Subject: [PATCH 352/694] ocxl: fix pci device refcount leak when calling get_function_0() get_function_0() calls pci_get_domain_bus_and_slot(), as comment says, it returns a pci device with refcount increment, so after using it, pci_dev_put() needs be called. Get the device reference when get_function_0() is not called, so pci_dev_put() can be called in the error path and callers unconditionally. And add comment above get_dvsec_vendor0() to tell callers to call pci_dev_put(). Fixes: 87db7579ebd5 ("ocxl: control via sysfs whether the FPGA is reloaded on a link reset") Suggested-by: Andrew Donnellan Signed-off-by: Yang Yingliang Acked-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221121154339.4088935-1-yangyingliang@huawei.com --- drivers/misc/ocxl/config.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index e401a51596b9c..92ab49705f645 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -193,6 +193,18 @@ static int read_dvsec_vendor(struct pci_dev *dev) return 0; } +/** + * get_dvsec_vendor0() - Find a related PCI device (function 0) + * @dev: PCI device to match + * @dev0: The PCI device (function 0) found + * @out_pos: The position of PCI device (function 0) + * + * Returns 0 on success, negative on failure. + * + * NOTE: If it's successful, the reference of dev0 is increased, + * so after using it, the callers must call pci_dev_put() to give + * up the reference. + */ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, int *out_pos) { @@ -202,10 +214,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, dev = get_function_0(dev); if (!dev) return -1; + } else { + dev = pci_dev_get(dev); } pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); - if (!pos) + if (!pos) { + pci_dev_put(dev); return -1; + } *dev0 = dev; *out_pos = pos; return 0; @@ -222,6 +238,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val) pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, &reset_reload); + pci_dev_put(dev0); *val = !!(reset_reload & BIT(0)); return 0; } @@ -243,6 +260,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val) reset_reload &= ~BIT(0); pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, reset_reload); + pci_dev_put(dev0); return 0; } -- GitLab From 14b5d59a261b1947db287b3b52f4bb1dc496dede Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Fri, 1 Jul 2022 05:45:53 -0400 Subject: [PATCH 353/694] powerpc/pseries: Fix formatting to make code look more beautiful Operators should be separated by spaces in tce_buildmulti_pSeriesLP() Signed-off-by: Deming Wang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701094553.1722-1-wangdeming@inspur.com --- arch/powerpc/platforms/pseries/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 561adac690229..c74b71d4733d4 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -248,7 +248,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * Set up the page with TCE data, looping through and setting * the values. */ - limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); + limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); -- GitLab From 7af82ff90a2b0690c2c45818fcce4c4ac3b187f3 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 9 Aug 2022 16:24:25 +0530 Subject: [PATCH 354/694] powerpc/ftrace: Ignore weak functions Extend commit b39181f7c6907d ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function") to ppc32 and ppc64 -mprofile-kernel by defining FTRACE_MCOUNT_MAX_OFFSET. For ppc64 -mprofile-kernel ABI, we can have two instructions at function entry for TOC setup followed by 'mflr r0' and 'bl _mcount'. So, the mcount location is at most the 4th instruction in a function. For ppc32, mcount location is always the 3rd instruction in a function, preceded by 'mflr r0' and 'stw r0,4(r1)'. With this patch, and with ppc64le_guest_defconfig and some ftrace/bpf config items enabled: # grep __ftrace_invalid_address available_filter_functions | wc -l 79 Signed-off-by: Naveen N. Rao Acked-by: Steven Rostedt (Google) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220809105425.424045-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 3cee7115441b4..ade406dc6504e 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,6 +10,13 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +/* Ignore unused weak functions which will have larger offsets */ +#ifdef CONFIG_MPROFILE_KERNEL +#define FTRACE_MCOUNT_MAX_OFFSET 12 +#elif defined(CONFIG_PPC32) +#define FTRACE_MCOUNT_MAX_OFFSET 8 +#endif + #ifndef __ASSEMBLY__ extern void _mcount(void); -- GitLab From addebe1cfa71eb29caa9d5c6bc719171e4e76414 Mon Sep 17 00:00:00 2001 From: Nicholas Miehlbradt Date: Wed, 10 Aug 2022 04:03:21 +0000 Subject: [PATCH 355/694] docs: powerpc: add POWER9 and POWER10 to CPU families Add POWER9 and POWER10 to CPU families and list Radix MMU. Signed-off-by: Nicholas Miehlbradt Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220810040321.375396-1-nicholas@linux.ibm.com --- Documentation/powerpc/cpu_families.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst index 9b84e045e7134..eb7e60649b433 100644 --- a/Documentation/powerpc/cpu_families.rst +++ b/Documentation/powerpc/cpu_families.rst @@ -10,6 +10,7 @@ Book3S (aka sPAPR) ------------------ - Hash MMU (except 603 and e300) +- Radix MMU (POWER9 and later) - Software loaded TLB (603 and e300) - Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600) - Mix of 32 & 64 bit:: @@ -100,6 +101,18 @@ Book3S (aka sPAPR) v +--------------+ | POWER8 | + +--------------+ + | + | + v + +--------------+ + | POWER9 | + +--------------+ + | + | + v + +--------------+ + | POWER10 | +--------------+ -- GitLab From ff8fae94e26f5cd2779ceda0ee6d714a10501abd Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sun, 4 Sep 2022 11:51:02 -0400 Subject: [PATCH 356/694] drivers/ps3: Fix double word in comments Drop the repeated word "when" in comments. Signed-off-by: Shaomin Deng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220904155102.26957-1-dengshaomin@cdjrlc.com --- drivers/ps3/ps3-lpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 65512b6cc6fdc..200ad8751860a 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -1066,7 +1066,7 @@ EXPORT_SYMBOL_GPL(ps3_disable_pm_interrupts); * instance, specified by one of enum ps3_lpm_tb_type. * @tb_cache: Optional user supplied buffer to use as the trace buffer cache. * If NULL, the driver will allocate and manage an internal buffer. - * Unused when when @tb_type is PS3_LPM_TB_TYPE_NONE. + * Unused when @tb_type is PS3_LPM_TB_TYPE_NONE. * @tb_cache_size: The size in bytes of the user supplied @tb_cache buffer. * Unused when @tb_cache is NULL or @tb_type is PS3_LPM_TB_TYPE_NONE. */ -- GitLab From b86cf14f240e002e001fd4f2bf49114c7836fd5c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 9 Sep 2022 15:23:12 +1000 Subject: [PATCH 357/694] powerpc: add compile-time support for lbarx, lharx ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lharx. Add a compile option that allows code to use it, and add support in cmpxchg and xchg 8 and 16 bit values without shifting and masking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220909052312.63916-1-npiggin@gmail.com --- arch/powerpc/Kconfig | 3 + arch/powerpc/include/asm/cmpxchg.h | 231 ++++++++++++++++++++++++- arch/powerpc/lib/sstep.c | 21 +-- arch/powerpc/platforms/Kconfig.cputype | 5 + 4 files changed, 249 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2fc..4fd4924f6d50e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -293,6 +293,9 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_E500 +config PPC_HAS_LBARX_LHARX + bool + config EARLY_PRINTK bool default y diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 05f246c0e36eb..d0ea0571e79ab 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ * the previous value stored there. */ +#ifndef CONFIG_PPC_HAS_LBARX_LHARX XCHG_GEN(u8, _local, "memory"); XCHG_GEN(u8, _relaxed, "cc"); XCHG_GEN(u16, _local, "memory"); XCHG_GEN(u16, _relaxed, "cc"); +#else +static __always_inline unsigned long +__xchg_u8_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_local\n" +" stbcx. %3,0,%2 \n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u8_relaxed(u8 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_relaxed\n" +" stbcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_local\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_relaxed(u16 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_relaxed\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} +#endif static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ (unsigned long)_x_, sizeof(*(ptr))); \ }) + /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ - +#ifndef CONFIG_PPC_HAS_LBARX_LHARX CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u8, _local, , , "memory"); CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); @@ -211,6 +278,168 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u16, _local, , , "memory"); CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); CMPXCHG_GEN(u16, _relaxed, , , "cc"); +#else +static __always_inline unsigned long +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lbarx %0,0,%2 # __cmpxchg_u8\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lharx %0,0,%2 # __cmpxchg_u16\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} +#endif static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 398b5694aeb70..38158b77a8019 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 4); break; -#ifdef __powerpc64__ - case 84: /* ldarx */ - op->type = MKOP(LARX, 0, 8); - break; - - case 214: /* stdcx. */ - op->type = MKOP(STCX, 0, 8); - break; - +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 52: /* lbarx */ op->type = MKOP(LARX, 0, 1); break; @@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 726: /* sthcx. */ op->type = MKOP(STCX, 0, 2); break; +#endif +#ifdef __powerpc64__ + case 84: /* ldarx */ + op->type = MKOP(LARX, 0, 8); + break; + + case 214: /* stdcx. */ + op->type = MKOP(STCX, 0, 8); + break; case 276: /* lqarx */ if (!((rd & 1) || rd == ra || rd == rb)) @@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; val = 0; switch (size) { -#ifdef __powerpc64__ +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 1: __get_user_asmx(val, ea, err, "lbarx"); break; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 0c4eed9aea806..7bac213b4125d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -135,6 +135,7 @@ config GENERIC_CPU depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWERPC_CPU bool "Generic 32 bits powerpc" @@ -160,17 +161,20 @@ config POWER7_CPU depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER9_CPU bool "POWER9" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_HAS_LBARX_LHARX config POWER10_CPU bool "POWER10" @@ -184,6 +188,7 @@ config E5500_CPU config E6500_CPU bool "Freescale e6500" depends on PPC64 && PPC_E500 + select PPC_HAS_LBARX_LHARX config 405_CPU bool "40x family" -- GitLab From d87a233717da400792fa601b29fa74a7d28e03c2 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sun, 11 Sep 2022 16:43:44 +0800 Subject: [PATCH 358/694] powerpc/pasemi: Add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs. Signed-off-by: Xiu Jianfeng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220911084344.196353-1-xiujianfeng@huawei.com --- arch/powerpc/platforms/pasemi/gpio_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index bf300167ad6bf..913b77b92cea1 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -294,7 +294,7 @@ static struct platform_driver gpio_mdio_driver = }, }; -static int gpio_mdio_init(void) +static int __init gpio_mdio_init(void) { struct device_node *np; @@ -314,7 +314,7 @@ static int gpio_mdio_init(void) } module_init(gpio_mdio_init); -static void gpio_mdio_exit(void) +static void __exit gpio_mdio_exit(void) { platform_driver_unregister(&gpio_mdio_driver); if (gpio_regs) -- GitLab From 2223552256dfc48435e0699dbe1e9b8d2cd56b06 Mon Sep 17 00:00:00 2001 From: Disha Goel Date: Fri, 16 Sep 2022 16:27:35 +0530 Subject: [PATCH 359/694] powerpc/kvm: Remove unused macros from asm-offset The kvm code was refactored to convert some of kvm assembly routines to C. This includes commits which moved code path for the kvm guest entry/exit for p7/8 from aseembly to C. As part of the code changes, usage of some of the macros were removed. But definitions still exist in the assembly files. Commits are listed below: Commit 2e1ae9cd56f8 ("KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU") Commit 9769a7fd79b6 ("KVM: PPC: Book3S HV: Remove radix guest support from P7/8 path") Commit fae5c9f3664b ("KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path") Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") Many of the asm-offset macro definitions were missed to remove. Patch fixes by removing the unused macros. Signed-off-by: Disha Goel Reviewed-by: Nicholas Piggin Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916105736.268153-2-disgoel@linux.vnet.ibm.com --- arch/powerpc/kernel/asm-offsets.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4ce2a4aa39854..b4b661f631f5f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -418,21 +418,18 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets); OFFSET(KVM_SDR1, kvm, arch.sdr1); OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); - OFFSET(KVM_RADIX, kvm, arch.radix); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); - OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested); OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); #endif @@ -449,16 +446,12 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); - OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); - OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); - OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); - OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); @@ -486,8 +479,6 @@ int main(void) OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr); OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop); OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); - OFFSET(VCPU_TID, kvm_vcpu, arch.tid); - OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); @@ -582,8 +573,6 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); - HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -594,9 +583,6 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); - HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); - HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); - HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); @@ -672,17 +658,6 @@ int main(void) OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif -#ifdef CONFIG_KVM_XICS - DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu, - arch.xive_saved_state)); - DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, - arch.xive_cam_word)); - DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); - DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); - DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); - DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); -#endif - #ifdef CONFIG_KVM_EXIT_TIMING OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); -- GitLab From 4ac9d3187cc7ccba25f76a3faef3e08a366f77b9 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 16 Sep 2022 16:27:36 +0530 Subject: [PATCH 360/694] powerpc/kvm: Remove unused references for MMCR3/SIER2/SIER3 registers Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") removed the PMU save/restore functions from assembly code and implemented these functions in C, for power9 and later platforms. After the code refactoring, Performance Monitoring Unit (PMU) registers became part of "p9_host_os_sprs" structure and now this structure is used to save/restore pmu host registers, for power9 and later platfroms. But we still have old unused registers references. Patch removes unused host_mmcr references for Monitor Mode Control Register 3 (MMCR3)/ Sampled Instruction Event Register 2 (SIER2)/ SIER3 registers from "struct kvmppc_host_state". Fixes: 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C") Signed-off-by: Kajol Jain Reviewed-by: Nicholas Piggin Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220916105736.268153-3-disgoel@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_book3s_asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index c8882d9b86c27..a36797938620f 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -105,7 +105,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ + u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; -- GitLab From 1c4a4a4c8410be4a231a58b23e7a30923ff954ac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 16 Sep 2022 15:15:04 +0100 Subject: [PATCH 361/694] powerpc/xmon: Fix -Wswitch-unreachable warning in bpt_cmds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with automatic stack variable initialization, GCC 12 complains about variables defined outside of switch case statements. Move the variable into the case that uses it, which silences the warning: arch/powerpc/xmon/xmon.c: In function ‘bpt_cmds’: arch/powerpc/xmon/xmon.c:1529:13: warning: statement will never be executed [-Wswitch-unreachable] 1529 | int mode; | ^~~~ Fixes: 09b6c1129f89 ("powerpc/xmon: Fix compile error with PPC_8xx=y") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YySE6FHiOcbWWR+9@work --- arch/powerpc/xmon/xmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f51c882bf9023..e34d7809f6c9f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1525,9 +1525,9 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { - static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; - int mode; - case 'd': /* bd - hardware data breakpoint */ + case 'd': { /* bd - hardware data breakpoint */ + static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; + int mode; if (xmon_is_ro) { printf(xmon_ro_msg); break; @@ -1560,6 +1560,7 @@ bpt_cmds(void) force_enable_xmon(); break; + } case 'i': /* bi - hardware instr breakpoint */ if (xmon_is_ro) { -- GitLab From 1892e87a3e9170146549779622cb844582f1e2bb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Sep 2022 23:03:25 -0700 Subject: [PATCH 362/694] powerpc/warp: switch to using gpiod API This switches PIKA Warp away from legacy gpio API and to newer gpiod API, so that we can eventually deprecate the former. Because LEDs are normally driven by leds-gpio driver, but the platform code also wants to access the LEDs during thermal shutdown, and gpiod API does not allow locating GPIO without requesting it, the platform code is now responsible for locating GPIOs through device tree and requesting them. It then constructs platform data for leds-gpio platform device and registers it. This allows platform code to retain access to LED GPIO descriptors and use them when needed. Signed-off-by: Dmitry Torokhov Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YzKSLcrYmV5kjyeX@google.com --- arch/powerpc/boot/dts/warp.dts | 4 +- arch/powerpc/platforms/44x/warp.c | 105 ++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index b4f32740870e0..aa62d08e97c2f 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -258,14 +258,12 @@ }; power-leds { - compatible = "gpio-leds"; + compatible = "warp-power-leds"; green { gpios = <&GPIO1 0 0>; - default-state = "keep"; }; red { gpios = <&GPIO1 1 0>; - default-state = "keep"; }; }; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index f03432ef010bd..cefa313c09f0b 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -5,15 +5,17 @@ * Copyright (c) 2008-2009 PIKA Technologies * Sean MacLennan */ +#include #include #include #include +#include #include #include #include #include #include -#include +#include #include #include @@ -92,8 +94,6 @@ static int __init warp_post_info(void) static LIST_HEAD(dtm_shutdown_list); static void __iomem *dtm_fpga; -static unsigned green_led, red_led; - struct dtm_shutdown { struct list_head list; @@ -101,7 +101,6 @@ struct dtm_shutdown { void *arg; }; - int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) { struct dtm_shutdown *shutdown; @@ -132,6 +131,35 @@ int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) return -EINVAL; } +#define WARP_GREEN_LED 0 +#define WARP_RED_LED 1 + +static struct gpio_led warp_gpio_led_pins[] = { + [WARP_GREEN_LED] = { + .name = "green", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, + [WARP_RED_LED] = { + .name = "red", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, +}; + +static struct gpio_led_platform_data warp_gpio_led_data = { + .leds = warp_gpio_led_pins, + .num_leds = ARRAY_SIZE(warp_gpio_led_pins), +}; + +static struct platform_device warp_gpio_leds = { + .name = "leds-gpio", + .id = -1, + .dev = { + .platform_data = &warp_gpio_led_data, + }, +}; + static irqreturn_t temp_isr(int irq, void *context) { struct dtm_shutdown *shutdown; @@ -139,7 +167,7 @@ static irqreturn_t temp_isr(int irq, void *context) local_irq_disable(); - gpio_set_value(green_led, 0); + gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0); /* Run through the shutdown list. */ list_for_each_entry(shutdown, &dtm_shutdown_list, list) @@ -153,7 +181,7 @@ static irqreturn_t temp_isr(int irq, void *context) out_be32(dtm_fpga + 0x14, reset); } - gpio_set_value(red_led, value); + gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value); value ^= 1; mdelay(500); } @@ -162,25 +190,78 @@ static irqreturn_t temp_isr(int irq, void *context) return IRQ_HANDLED; } +/* + * Because green and red power LEDs are normally driven by leds-gpio driver, + * but in case of critical temperature shutdown we want to drive them + * ourselves, we acquire both and then create leds-gpio platform device + * ourselves, instead of doing it through device tree. This way we can still + * keep access to the gpios and use them when needed. + */ static int pika_setup_leds(void) { struct device_node *np, *child; + struct gpio_desc *gpio; + struct gpio_led *led; + int led_count = 0; + int error; + int i; - np = of_find_compatible_node(NULL, NULL, "gpio-leds"); + np = of_find_compatible_node(NULL, NULL, "warp-power-leds"); if (!np) { printk(KERN_ERR __FILE__ ": Unable to find leds\n"); return -ENOENT; } - for_each_child_of_node(np, child) - if (of_node_name_eq(child, "green")) - green_led = of_get_gpio(child, 0); - else if (of_node_name_eq(child, "red")) - red_led = of_get_gpio(child, 0); + for_each_child_of_node(np, child) { + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + + if (!of_node_name_eq(child, led->name)) + continue; + + if (led->gpiod) { + printk(KERN_ERR __FILE__ ": %s led has already been defined\n", + led->name); + continue; + } + + gpio = fwnode_gpiod_get_index(of_fwnode_handle(child), + NULL, 0, GPIOD_ASIS, + led->name); + error = PTR_ERR_OR_ZERO(gpio); + if (error) { + printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n", + led->name, error); + of_node_put(child); + goto err_cleanup_pins; + } + + led->gpiod = gpio; + led_count++; + } + } of_node_put(np); + /* Skip device registration if no leds have been defined */ + if (led_count) { + error = platform_device_register(&warp_gpio_leds); + if (error) { + printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n", + error); + goto err_cleanup_pins; + } + } + return 0; + +err_cleanup_pins: + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + gpiod_put(led->gpiod); + led->gpiod = NULL; + } + return error; } static void pika_setup_critical_temp(struct device_node *np, -- GitLab From 4e87bd14e501030619d1bad29b3ec1f947f84fc4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 27 Sep 2022 12:23:58 -0700 Subject: [PATCH 363/694] powerpc/sgy_cts1000: convert to using gpiod API and facelift This patch converts the driver to newer gpiod API, and away from OF-specific legacy gpio API that we want to stop using. While at it, let's address a few more issues: - switch to using dev_info()/pr_info() and friends - cancel work when unbinding the driver Note that the original code handled halt GPIO polarity incorrectly: in halt callback, when line polarity is "low" it would set trigger to "1" and drive halt line high, which is counter to the annotation. gpiod API will drive such line low. However I do not see any DTSes in mainline that have a DT node with "sgy,gpio-halt" compatible. Signed-off-by: Dmitry Torokhov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/YzNNznewTyCJiGFz@google.com --- arch/powerpc/platforms/85xx/sgy_cts1000.c | 132 +++++++++------------- 1 file changed, 53 insertions(+), 79 deletions(-) diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index e14d1b74d4e4c..751395cbf0222 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -7,10 +7,13 @@ * Copyright 2012 by Servergy, Inc. */ +#define pr_fmt(fmt) "gpio-halt: " fmt + +#include #include #include +#include #include -#include #include #include #include @@ -18,7 +21,8 @@ #include -static struct device_node *halt_node; +static struct gpio_desc *halt_gpio; +static int halt_irq; static const struct of_device_id child_match[] = { { @@ -36,23 +40,10 @@ static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn); static void __noreturn gpio_halt_cb(void) { - enum of_gpio_flags flags; - int trigger, gpio; - - if (!halt_node) - panic("No reset GPIO information was provided in DT\n"); - - gpio = of_get_gpio_flags(halt_node, 0, &flags); - - if (!gpio_is_valid(gpio)) - panic("Provided GPIO is invalid\n"); - - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - printk(KERN_INFO "gpio-halt: triggering GPIO.\n"); + pr_info("triggering GPIO.\n"); /* Probably wont return */ - gpio_set_value(gpio, trigger); + gpiod_set_value(halt_gpio, 1); panic("Halt failed\n"); } @@ -61,95 +52,78 @@ static void __noreturn gpio_halt_cb(void) * to handle the shutdown/poweroff. */ static irqreturn_t gpio_halt_irq(int irq, void *__data) { - printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n"); + struct platform_device *pdev = __data; + + dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n"); schedule_work(&gpio_halt_wq); return IRQ_HANDLED; }; -static int gpio_halt_probe(struct platform_device *pdev) +static int __gpio_halt_probe(struct platform_device *pdev, + struct device_node *halt_node) { - enum of_gpio_flags flags; - struct device_node *node = pdev->dev.of_node; - struct device_node *child_node; - int gpio, err, irq; - int trigger; - - if (!node) - return -ENODEV; - - /* If there's no matching child, this isn't really an error */ - child_node = of_find_matching_node(node, child_match); - if (!child_node) - return 0; - - /* Technically we could just read the first one, but punish - * DT writers for invalid form. */ - if (of_gpio_count(child_node) != 1) { - err = -EINVAL; - goto err_put; - } - - /* Get the gpio number relative to the dynamic base. */ - gpio = of_get_gpio_flags(child_node, 0, &flags); - if (!gpio_is_valid(gpio)) { - err = -EINVAL; - goto err_put; - } + int err; - err = gpio_request(gpio, "gpio-halt"); + halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node), + NULL, 0, GPIOD_OUT_LOW, "gpio-halt"); + err = PTR_ERR_OR_ZERO(halt_gpio); if (err) { - printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n", - gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err); + return err; } - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - gpio_direction_output(gpio, !trigger); - /* Now get the IRQ which tells us when the power button is hit */ - irq = irq_of_parse_and_map(child_node, 0); - err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, "gpio-halt", child_node); + halt_irq = irq_of_parse_and_map(halt_node, 0); + err = request_irq(halt_irq, gpio_halt_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "gpio-halt", pdev); if (err) { - printk(KERN_ERR "gpio-halt: error requesting IRQ %d for " - "GPIO %d.\n", irq, gpio); - gpio_free(gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request IRQ %d: %d\n", + halt_irq, err); + gpiod_put(halt_gpio); + halt_gpio = NULL; + return err; } /* Register our halt function */ ppc_md.halt = gpio_halt_cb; pm_power_off = gpio_halt_cb; - printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" - " irq).\n", gpio, trigger, irq); + dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq); - halt_node = child_node; return 0; - -err_put: - of_node_put(child_node); - return err; } -static int gpio_halt_remove(struct platform_device *pdev) +static int gpio_halt_probe(struct platform_device *pdev) { - if (halt_node) { - int gpio = of_get_gpio(halt_node, 0); - int irq = irq_of_parse_and_map(halt_node, 0); + struct device_node *halt_node; + int ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + /* If there's no matching child, this isn't really an error */ + halt_node = of_find_matching_node(pdev->dev.of_node, child_match); + if (!halt_node) + return -ENODEV; + + ret = __gpio_halt_probe(pdev, halt_node); + of_node_put(halt_node); - free_irq(irq, halt_node); + return ret; +} - ppc_md.halt = NULL; - pm_power_off = NULL; +static int gpio_halt_remove(struct platform_device *pdev) +{ + free_irq(halt_irq, pdev); + cancel_work_sync(&gpio_halt_wq); - gpio_free(gpio); + ppc_md.halt = NULL; + pm_power_off = NULL; - of_node_put(halt_node); - halt_node = NULL; - } + gpiod_put(halt_gpio); + halt_gpio = NULL; return 0; } -- GitLab From f2c45962cc618c12f69fd46e6ebc20b9cd7f15ac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Sep 2022 08:29:00 +0200 Subject: [PATCH 364/694] powerpc/8xx: Simplify pte_update() with 16k pages While looking at code generated for code patching, I saw that pte_clear generated: 2d8: 38 a0 00 00 li r5,0 2dc: 38 e0 10 00 li r7,4096 2e0: 39 00 20 00 li r8,8192 2e4: 39 40 30 00 li r10,12288 2e8: 90 a9 00 00 stw r5,0(r9) 2ec: 90 e9 00 04 stw r7,4(r9) 2f0: 91 09 00 08 stw r8,8(r9) 2f4: 91 49 00 0c stw r10,12(r9) With 16k pages, only the first entry is used by the kernel, so no need to adapt the address of other entries. Only duplicate the first entry for hardware. Now it is: 2cc: 39 40 00 00 li r10,0 2d0: 91 49 00 00 stw r10,0(r9) 2d4: 91 49 00 04 stw r10,4(r9) 2d8: 91 49 00 08 stw r10,8(r9) 2dc: 91 49 00 0c stw r10,12(r9) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/65f76300de07091a59a042a3db2d0ce9b939a05c.1664346532.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0d40b33184ebe..0e861e59b7695 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -256,8 +256,14 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p num = number_of_cells_per_pte(pmd, new, huge); - for (i = 0; i < num; i++, entry++, new += SZ_4K) - *entry = new; + for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { + *entry++ = new; + if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { + *entry++ = new; + *entry++ = new; + *entry++ = new; + } + } return old; } -- GitLab From 0b4721815c5328e08c3acdee4a53890e012d830b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Sep 2022 08:29:22 +0200 Subject: [PATCH 365/694] powerpc/8xx: Reverse order entries are written by __set_pte_at() At the time being, with 16k pages __set_pte_at() writes table entries in reverse order: 294: 91 49 00 0c stw r10,12(r9) 298: 91 49 00 08 stw r10,8(r9) 29c: 91 49 00 04 stw r10,4(r9) 2a0: 91 49 00 00 stw r10,0(r9) Allthough there should be no impact at all as it stays in a single cacheline, reverse the writing in a more natural order. 288: 91 49 00 0c stw r10,0(r9) 28c: 91 49 00 08 stw r10,4(r9) 290: 91 49 00 04 stw r10,8(r9) 294: 91 49 00 00 stw r10,12(r9) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/67c3b5d44edfec054234ea9b4d05fc4b4f7f8a0e.1664346554.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index d9067dfc531cc..69c3a050a3d85 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -183,7 +183,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * cases, and 32-bit non-hash with 32-bit PTEs. */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - ptep->pte = ptep->pte1 = ptep->pte2 = ptep->pte3 = pte_val(pte); + ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte); #else *ptep = pte; #endif -- GitLab From 5825603f67bc5ff445a1847302884154f0afa627 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 30 Sep 2022 16:20:12 +0930 Subject: [PATCH 366/694] powerpc/microwatt: Add litesd This is the register layout of the litesd peripheral for the fusesoc based Microwatt SoC. It requires a description of the system clock, which is hardcoded to 100MHz. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220930065012.2860577-1-joel@jms.id.au --- arch/powerpc/boot/dts/microwatt.dts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index b69db1d275cd6..269e930b3b0b1 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -21,6 +21,14 @@ reg = <0x00000000 0x00000000 0x00000000 0x10000000>; }; + clocks { + sys_clk: litex_sys_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <100000000>; + }; + }; + cpus { #size-cells = <0x00>; #address-cells = <0x01>; @@ -141,6 +149,20 @@ litex,slot-size = <0x800>; interrupts = <0x11 0x1>; }; + + mmc@8040000 { + compatible = "litex,mmc"; + reg = <0x8042800 0x800 + 0x8041000 0x800 + 0x8040800 0x800 + 0x8042000 0x800 + 0x8041800 0x800>; + reg-names = "phy", "core", "reader", "writer", "irq"; + bus-width = <4>; + interrupts = <0x13 1>; + cap-sd-highspeed; + clocks = <&sys_clk>; + }; }; chosen { -- GitLab From 3e65412709293d5fb65249408e8e801b23b72635 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Oct 2022 14:20:18 +1100 Subject: [PATCH 367/694] powerpc: Make instruction dump work with scripts/decodecode Matt reported that scripts/decodecode doesn't work for the instruction dump in the powerpc oops output. Although there are scripts around that can decode it, it would be preferable if the standard in-tree script worked. All other arches prefix the instruction dump with "Code:", and that's what the script looks for, so use that. The script then works as expected: $ CROSS_COMPILE=powerpc64le-linux-gnu- ./scripts/decodecode Code: fbc1fff0 f821ffc1 7c7d1b78 7c9c2378 ebc30028 7fdff378 48000018 60000000 60000000 ebff0008 7c3ef840 41820048 <815f0060> e93f0000 5529077c 7d295378 ^D All code ======== 0: f0 ff c1 fb std r30,-16(r1) 4: c1 ff 21 f8 stdu r1,-64(r1) 8: 78 1b 7d 7c mr r29,r3 ... Note that the script doesn't cope well with printk timestamps or printk caller info. Reported-by: Matthew Wilcox Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006032019.1128624-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 67da147fe34dc..3372b5c21168e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Instruction dump:"); + printk("Code:"); /* * If we were executing with the MMU off for instructions, adjust pc -- GitLab From d90bb7b4fdaff3f2fa68c7af85de2ce9e70189b1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Oct 2022 14:20:19 +1100 Subject: [PATCH 368/694] powerpc: Print instruction dump on a single line Although the previous commit made the powerpc instruction dump usable with scripts/decodecode, there are still some problems. Because the dump is split across multiple lines, the script doesn't cope with printk timestamps or caller info. That can be fixed by printing the entire dump on one line, eg: [ 12.016307][ T112] --- interrupt: c00 [ 12.016605][ T112] Code: 4b7aae15 60000000 3d22016e 3c62ffec 39291160 38639bc0 e8890000 4b7aadf9 60000000 4bfffee8 7c0802a6 60000000 <0fe00000> 60420000 3c4c008f 384268a0 [ 12.017655][ T112] ---[ end trace 0000000000000000 ]--- That output can then be piped directly into scripts/decodecode and interpreted correctly. Printing the dump on a single line does produce a very long line, about 173 characters. That is still shorter than x86, which prints nearly 200 characters even without timestamps etc. All consoles I'm aware of will wrap the line if it's too long, so the length should not be a functional problem. If anything it should help on consoles like VGA by using less vertical space. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006032019.1128624-2-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3372b5c21168e..e3e1feaa536a9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Code:"); + printk("Code: "); /* * If we were executing with the MMU off for instructions, adjust pc @@ -1373,9 +1373,6 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!(i % 8)) - pr_cont("\n"); - if (!__kernel_text_address(pc) || get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); -- GitLab From f985adaf2ff934ec869b32ca1f7f97e2825e3a49 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 6 Oct 2022 20:56:53 +1000 Subject: [PATCH 369/694] powerpc: remove the last remnants of cputime_t cputime_t was a core kernel type, removed by commits ed5c8c854f2b..b672592f0221. As explained in commit b672592f0221 ("sched/cputime: Remove generic asm headers"), the final cleanup is for the arch to provide cputime_to_nsec[s](). Commit ade7667a981b ("powerpc: Add cputime_to_nsecs()") did that, but justdidn't remove the then-unused cputime_to_usecs(), cputime_t type, and associated remnants. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221006105653.115829-1-npiggin@gmail.com --- arch/powerpc/include/asm/cputime.h | 17 +---------------- arch/powerpc/kernel/time.c | 23 ++--------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 431ae23430222..4961fb38e4385 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -21,23 +21,8 @@ #include #include -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new) - #ifdef __KERNEL__ -/* - * Convert cputime <-> microseconds - */ -extern u64 __cputime_usec_factor; - -static inline unsigned long cputime_to_usecs(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_usec_factor); -} - -#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) +#define cputime_to_nsecs(cputime) tb_to_ns(cputime) /* * PPC64 uses PACA which is task independent for storing accounting data while diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a2ab397065c66..d68de3618741e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; -EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); @@ -150,21 +150,6 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); bool tb_invalid; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -/* - * Factor for converting from cputime_t (timebase ticks) to - * microseconds. This is stored as 0.64 fixed-point binary fraction. - */ -u64 __cputime_usec_factor; -EXPORT_SYMBOL(__cputime_usec_factor); - -static void calc_cputime_factors(void) -{ - struct div_result res; - - div128_by_32(1000000, 0, tb_ticks_per_sec, &res); - __cputime_usec_factor = res.result_low; -} - /* * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. @@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } - -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#define calc_cputime_factors() -#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ void __delay(unsigned long loops) { @@ -914,7 +896,6 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - calc_cputime_factors(); /* * Compute scale factor for sched_clock. -- GitLab From 2cb1dfac6f792f9e4a092793215f0d26e9f8d5b2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 Oct 2022 12:49:50 +0200 Subject: [PATCH 370/694] powerpc/sysdev: Remove some duplicate prefix in some messages At the beginning of the file, we have: #define pr_fmt(fmt) "xive: " fmt So, there is no need to duplicate "XIVE:" in debug and error messages. For the records, these useless prefix have been added in commit 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Signed-off-by: Christophe JAILLET Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7b8b5915a2c7c1616b33e8433ebe0a0bf07070a2.1665312579.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/sysdev/xive/native.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 3925825954bcc..19d880ebc5e61 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -535,13 +535,13 @@ static bool __init xive_parse_provisioning(struct device_node *np) static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); + pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) - pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); + pr_err("Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", + pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } -- GitLab From 579aee9fc594af94c242068c011b0233563d4bbf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 10 Oct 2022 16:57:21 +1100 Subject: [PATCH 371/694] powerpc: suppress some linker warnings in recent linker versions This is a follow on from commit 0d362be5b142 ("Makefile: link with -z noexecstack --no-warn-rwx-segments") for arch/powerpc/boot to address wanrings like: ld: warning: opal-calls.o: missing .note.GNU-stack section implies executable stack ld: NOTE: This behaviour is deprecated and will be removed in a future version of the linker ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions This fixes issue https://github.com/linuxppc/issues/issues/417 Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221010165721.106267e6@canb.auug.org.au --- arch/powerpc/boot/wrapper | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 5bdd4dd20bbb1..a8a87d7667f4e 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -215,6 +215,11 @@ ld_version() }' } +ld_is_lld() +{ + ${CROSS}ld -V 2>&1 | grep -q LLD +} + # Do not include PT_INTERP segment when linking pie. Non-pie linking # just ignores this option. LD_VERSION=$(${CROSS}ld --version | ld_version) @@ -223,6 +228,14 @@ if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then nodl="--no-dynamic-linker" fi +# suppress some warnings in recent ld versions +nowarn="-z noexecstack" +if ! ld_is_lld; then + if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then + nowarn="$nowarn --no-warn-rwx-segments" + fi +fi + platformo=$object/"$platform".o lds=$object/zImage.lds ext=strip @@ -504,7 +517,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi -- GitLab From 8b49670f3bb3f10cd4d5a6dca17f5a31b173ecdc Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 17 Oct 2022 11:23:33 +0800 Subject: [PATCH 372/694] powerpc/xive: add missing iounmap() in error path in xive_spapr_populate_irq_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If remapping 'data->trig_page' fails, the 'data->eoi_mmio' need be unmapped before returning from xive_spapr_populate_irq_data(). Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Signed-off-by: Yang Yingliang Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221017032333.1852406-1-yangyingliang@huawei.com --- arch/powerpc/sysdev/xive/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index e2c8f93b535ba..e454192643910 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -439,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); if (!data->trig_mmio) { + iounmap(data->eoi_mmio); pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); return -ENOMEM; } -- GitLab From 16a3f41ff3322830683d3ccc14d77736829c61bf Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Wed, 19 Oct 2022 14:34:14 +0800 Subject: [PATCH 373/694] powerpc/mpic_msgr: fix cast removes address space of expression warnings When build Linux kernel, encounter the following warnings: ./arch/powerpc/sysdev/mpic_msgr.c:230:38: warning: cast removes address space '__iomem' of expression ./arch/powerpc/sysdev/mpic_msgr.c:230:27: warning: incorrect type in assignment (different address spaces) The data type of msgr->mer and msgr->base are 'u32 __iomem *', but converted to 'u32 *' and 'u8 *' directly and cause above warnings, now instead of using a type cast, change the size of the pointer offset to fix these warnings. Signed-off-by: ruanjinjie Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221019063414.3758087-1-ruanjinjie@huawei.com --- arch/powerpc/sysdev/mpic_msgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index a439e33eae061..d75064fb7d12f 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -20,7 +20,7 @@ #define MPIC_MSGR_REGISTERS_PER_BLOCK 4 #define MPIC_MSGR_STRIDE 0x10 -#define MPIC_MSGR_MER_OFFSET 0x100 +#define MPIC_MSGR_MER_OFFSET (0x100 / sizeof(u32)) #define MSGR_INUSE 0 #define MSGR_FREE 1 @@ -234,7 +234,7 @@ static int mpic_msgr_probe(struct platform_device *dev) reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i; msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE; - msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET); + msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET; msgr->in_use = MSGR_FREE; msgr->num = i; raw_spin_lock_init(&msgr->lock); -- GitLab From 2fa9482334b0593b7edc371a13c0cca81daaa89e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:57 +0530 Subject: [PATCH 374/694] powerpc/kprobes: Remove preempt disable around call to get_kprobe() in arch_prepare_kprobe() arch_prepare_kprobe() is called from register_kprobe() via prepare_kprobe(), or through register_aggr_kprobe(), both with the kprobe_mutex held. Per the comment for get_kprobe(): /* * This routine is called either: * - under the 'kprobe_mutex' - during kprobe_[un]register(). * OR * - with preemption disabled - from architecture specific code. */ As such, there is no need to disable preemption around the call to get_kprobe(). Drop the same. Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1043d06a0affed83a4a46dd29466e72820ee215d.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bd7b1a0354594..88f42de681e1f 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -158,9 +158,7 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } - preempt_disable(); prev = get_kprobe(p->addr - 1); - preempt_enable_no_resched(); /* * When prev is a ftrace-based kprobe, we don't have an insn, and it -- GitLab From 04ec5d5782fb346c291a05a2efe59483d8ada4c4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:58 +0530 Subject: [PATCH 375/694] powerpc/kprobes: Have optimized_callback() use preempt_enable() Similar to x86 commit 2e62024c265aa6 ("kprobes/x86: Use preempt_enable() in optimized_callback()"), change powerpc optprobes to use preempt_enable() rather than preempt_enable_no_resched() since powerpc also removed irq disabling for optprobes in commit f72180cc93a2c6 ("powerpc/kprobes: Do not disable interrupts for optprobes and kprobes_on_ftrace"). Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1885bab182626c33d9bf6421f430abf924c521a5.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/optprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 3b1c2236cbee5..004fae2044a3e 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op, __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + preempt_enable(); } NOKPROBE_SYMBOL(optimized_callback); -- GitLab From 266b1991a433cd55bb86a933216b3f6762737d47 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 20 Oct 2022 22:58:59 +0530 Subject: [PATCH 376/694] powerpc/kprobes: Use preempt_enable() rather than the no_resched variant preempt_enable_no_resched() is just the same as preempt_enable() when we are in a irqs disabled context. kprobe_handler() and the post/fault handlers are all called with irqs disabled. As such, convert those to just use preempt_enable(). Reported-by: Nicholas Piggin Signed-off-by: Naveen N. Rao Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/72639f75fe66f931ec8c2165276ffbfb0fe1006f.1666262278.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/kernel/kprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 88f42de681e1f..86ca5a61ea9af 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -369,7 +369,7 @@ int kprobe_handler(struct pt_regs *regs) if (ret > 0) { restore_previous_kprobe(kcb); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -382,7 +382,7 @@ int kprobe_handler(struct pt_regs *regs) if (p->pre_handler && p->pre_handler(p, regs)) { /* handler changed execution path, so skip ss setup */ reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } @@ -395,7 +395,7 @@ int kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -404,7 +404,7 @@ int kprobe_handler(struct pt_regs *regs) return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } NOKPROBE_SYMBOL(kprobe_handler); @@ -490,7 +490,7 @@ int kprobe_post_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr @@ -529,7 +529,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: -- GitLab From 04757c5e21ea17615b66f45e38f1cab32a7a0654 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Oct 2022 09:45:45 +0100 Subject: [PATCH 377/694] selftests/powerpc: Fix spelling mistake "mmaping" -> "mmapping" There is a spelling mistake in a perror message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221021084545.65973-1-colin.i.king@gmail.com --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index bbc05ffc5860a..1a70a96f0bfe7 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -329,7 +329,7 @@ static int parent(struct shared_info *info, pid_t pid) core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0); if (core == (void *) -1) { - perror("Error mmaping core file"); + perror("Error mmapping core file"); ret = TEST_FAIL; goto out; } -- GitLab From ad8284ead833379fc57d90e50dbae1352b116c2b Mon Sep 17 00:00:00 2001 From: Shaomin Deng Date: Sat, 29 Oct 2022 05:46:43 -0400 Subject: [PATCH 378/694] selftests/powerpc: Remove repeated word in comments Remove the repeated word "not" in comments. Signed-off-by: Shaomin Deng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221029094643.5595-1-dengshaomin@cdjrlc.com --- tools/testing/selftests/powerpc/include/pkeys.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 3312cb1b058d7..51729d9a71110 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,7 +24,7 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 -/* Older versions of libc do not not define this */ +/* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 #endif -- GitLab From f668027521561d1071ccf54500c82a58a1918b2b Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Mon, 24 Oct 2022 15:13:46 +1100 Subject: [PATCH 379/694] powerpc/8xx: Fix warning in hw_breakpoint_handler() In hw_breakpoint_handler(), ea is set by wp_get_instr_detail() except for 8xx, leading the variable to be passed uninitialised to wp_check_constraints(). This is safe as wp_check_constraints() returns early without using ea, so just set it to make the compiler happy. Signed-off-by: Russell Currey Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221024041346.103608-1-ruscur@russell.cc --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8db1a15d7acbe..e1b4e70c8fd0f 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -646,7 +646,7 @@ int hw_breakpoint_handler(struct die_args *args) ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; - unsigned long ea; + unsigned long ea = 0; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); -- GitLab From afa1cda4097077e37639ca7098c2147e1885b2df Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 31 Oct 2022 02:37:06 -0400 Subject: [PATCH 380/694] powerpc/pseries/eeh: Fix some kernel-doc warnings Fixes the following W=1 kernel build warning(s): arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_reset' arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_reset' arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_configure_bridge' arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_configure_bridge' Signed-off-by: Bo Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221031063706.2770-1-liubo03@inspur.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 8e40ccac0f44e..ea890037843c4 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -154,7 +154,7 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) /** * pseries_eeh_phb_reset - Reset the specified PHB * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * @option: reset option * * Reset the specified PHB/PE @@ -188,7 +188,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in /** * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered -- GitLab From 59dc2d94bc12dac53a5d2368ad97ca24e7cc5682 Mon Sep 17 00:00:00 2001 From: Chen Lifu Date: Thu, 3 Nov 2022 15:01:22 +0800 Subject: [PATCH 381/694] powerpc/powermac: Fix symbol not declared warnings 1. ppc_override_l2cr and ppc_override_l2cr_value are only used in l2cr_init() function, remove them and used *l2cr directly. 2. has_l2cache is not used outside of the file, so mark it static and do not initialise statics to 0. Fixes the following warnings: arch/powerpc/platforms/powermac/setup.c:73:5: warning: symbol 'ppc_override_l2cr' was not declared. Should it be static? arch/powerpc/platforms/powermac/setup.c:74:5: warning: symbol 'ppc_override_l2cr_value' was not declared. Should it be static? arch/powerpc/platforms/powermac/setup.c:75:5: warning: symbol 'has_l2cache' was not declared. Should it be static? Signed-off-by: Chen Lifu Reviewed-by: Christophe Leroy [mpe: Unwrap printk string] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221103070122.340773-1-chenlifu@huawei.com --- arch/powerpc/platforms/powermac/setup.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 04daa7f0a03c6..4f7ee885a78ff 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -70,9 +70,7 @@ #undef SHOW_GATWICK_IRQS -int ppc_override_l2cr = 0; -int ppc_override_l2cr_value; -int has_l2cache = 0; +static int has_l2cache; int pmac_newworld; @@ -236,22 +234,16 @@ static void __init l2cr_init(void) const unsigned int *l2cr = of_get_property(np, "l2cr-value", NULL); if (l2cr) { - ppc_override_l2cr = 1; - ppc_override_l2cr_value = *l2cr; _set_L2CR(0); - _set_L2CR(ppc_override_l2cr_value); + _set_L2CR(*l2cr); + pr_info("L2CR overridden (0x%x), backside cache is %s\n", + *l2cr, ((*l2cr) & 0x80000000) ? + "enabled" : "disabled"); } of_node_put(np); break; } } - - if (ppc_override_l2cr) - printk(KERN_INFO "L2CR overridden (0x%x), " - "backside cache is %s\n", - ppc_override_l2cr_value, - (ppc_override_l2cr_value & 0x80000000) - ? "enabled" : "disabled"); } #endif -- GitLab From 2330757e0be0acad88852e211dcd6106390a729b Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:34 -0500 Subject: [PATCH 382/694] powerpc/pseries: fix the object owners enum value in plpks driver OS_VAR_LINUX enum in PLPKS driver should be 0x02 instead of 0x01. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-2-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h index c6a291367bb13..275ccd86bfb5e 100644 --- a/arch/powerpc/platforms/pseries/plpks.h +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -17,7 +17,7 @@ #define WORLDREADABLE 0x08000000 #define SIGNEDUPDATE 0x01000000 -#define PLPKS_VAR_LINUX 0x01 +#define PLPKS_VAR_LINUX 0x02 #define PLPKS_VAR_COMMON 0x04 struct plpks_var { -- GitLab From af223e1728c448073d1e12fe464bf344310edeba Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:35 -0500 Subject: [PATCH 383/694] powerpc/pseries: Fix the H_CALL error code in PLPKS driver PAPR Spec defines H_P1 actually as H_PARAMETER and maps H_ABORTED to a different numerical value. Fix the error codes as per PAPR Specification. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-3-nayna@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 3 +-- arch/powerpc/platforms/pseries/plpks.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8abae463f6c12..95fd7f9485d55 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -79,7 +79,7 @@ #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDED -46 -#define H_P1 -54 +#define H_ABORTED -54 #define H_P2 -55 #define H_P3 -56 #define H_P4 -57 @@ -100,7 +100,6 @@ #define H_COP_HW -74 #define H_STATE -75 #define H_IN_USE -77 -#define H_ABORTED -78 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index f4b5b5a64db3d..32ce4d780d8f5 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -75,7 +75,7 @@ static int pseries_status_to_err(int rc) case H_FUNCTION: err = -ENXIO; break; - case H_P1: + case H_PARAMETER: case H_P2: case H_P3: case H_P4: -- GitLab From bb8e4c7cb759b90a04f2e94056b50288ff46a0ed Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:36 -0500 Subject: [PATCH 384/694] powerpc/pseries: Return -EIO instead of -EINTR for H_ABORTED error Some commands for eg. "cat" might continue to retry on encountering EINTR. This is not expected for original error code H_ABORTED. Map H_ABORTED to more relevant Linux error code EIO. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-4-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 32ce4d780d8f5..cbea447122ca0 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -111,7 +111,7 @@ static int pseries_status_to_err(int rc) err = -EEXIST; break; case H_ABORTED: - err = -EINTR; + err = -EIO; break; default: err = -EINVAL; -- GitLab From 8888ea772972323362660e9a1339175294664a6c Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:37 -0500 Subject: [PATCH 385/694] powerpc/pseries: cleanup error logs in plpks driver Logging H_CALL return codes in PLPKS driver are easy to confuse with Linux error codes. Let the caller of the function log the converted linux error code. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-5-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index cbea447122ca0..72d9debf18c0b 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -312,10 +312,6 @@ int plpks_write_var(struct plpks_var var) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to write variable %s for component %s with error %d\n", - var.name, var.component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -350,10 +346,6 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to remove variable %s for component %s with error %d\n", - vname.name, component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -395,8 +387,6 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) maxobjsize); if (rc != H_SUCCESS) { - pr_err("Failed to read variable %s for component %s with error %d\n", - var->name, var->component, rc); rc = pseries_status_to_err(rc); goto out_free_output; } -- GitLab From 212dd5cfbee7815f3c665a51c501701edb881599 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:38 -0500 Subject: [PATCH 386/694] powerpc/pseries: replace kmalloc with kzalloc in PLPKS driver Replace kmalloc with kzalloc in construct_auth() function to default initialize structure with zeroes. Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-6-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 72d9debf18c0b..e8c02735b7021 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer) if (consumer > PKS_OS_OWNER) return ERR_PTR(-EINVAL); - auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); if (!auth) return ERR_PTR(-ENOMEM); auth->version = 1; auth->consumer = consumer; - auth->rsvd0 = 0; - auth->rsvd1 = 0; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { - auth->passwordlength = 0; + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) return auth; - } memcpy(auth->password, ospassword, ospasswordlength); -- GitLab From 1f622f3f80cbf8999ff5955a2fcfbd801a1f32e0 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 6 Nov 2022 15:58:39 -0500 Subject: [PATCH 387/694] powerpc/pseries: fix plpks_read_var() code for different consumers Even though plpks_read_var() is currently called to read variables owned by different consumers, it internally supports only OS consumer. Fix plpks_read_var() to handle different consumers correctly. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221106205839.600442-7-nayna@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 28 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index e8c02735b7021..4edd1585e2457 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -354,22 +354,24 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; struct plpks_auth *auth; - struct label *label; + struct label *label = NULL; u8 *output; int rc; if (var->namelen > MAX_NAME_SIZE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); - label = construct_label(var->component, var->os, var->name, - var->namelen); - if (IS_ERR(label)) { - rc = PTR_ERR(label); - goto out_free_auth; + if (consumer == PKS_OS_OWNER) { + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } } output = kzalloc(maxobjsize, GFP_KERNEL); @@ -378,9 +380,15 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_label; } - rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), - virt_to_phys(label), label->size, virt_to_phys(output), - maxobjsize); + if (consumer == PKS_OS_OWNER) + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + else + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(var->name), var->namelen, virt_to_phys(output), + maxobjsize); + if (rc != H_SUCCESS) { rc = pseries_status_to_err(rc); -- GitLab From a9ffb8ee7b65a468474d6a2be7e9cca4b8f8ea5f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 18 Nov 2022 17:40:29 +0800 Subject: [PATCH 388/694] powerpc: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the related file to use "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl arch/powerpc` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Signed-off-by: Tiezhu Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1668764429-11540-1-git-send-email-yangtiezhu@loongson.cn --- arch/powerpc/boot/wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index a8a87d7667f4e..af04cea82b941 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -594,7 +594,7 @@ ps3) # reached, then enter the system reset vector of the partially decompressed # image. No warning is issued. rm -f "$odir"/{otheros,otheros-too-big}.bld - size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1) + size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1) bld="otheros.bld" if [ $size -gt $((0x1000000)) ]; then bld="otheros-too-big.bld" -- GitLab From d8a5b59c5fc75c99ba17e3eb1a8f580d8d172b28 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:41 +0100 Subject: [PATCH 389/694] phy: qcom-qmp-combo: fix out-of-bounds clock access The SM8250 only uses three clocks but the DP configuration erroneously described four clocks. In case the DP part of the PHY is initialised before the USB part, this would lead to uninitialised memory beyond the bulk-clocks array to be treated as a clock pointer as the clocks are requested based on the USB configuration. Fixes: aff188feb5e1 ("phy: qcom-qmp: add support for sm8250-usb3-dp phy") Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5e11b6a1d1899..bb38b18258ca2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1270,8 +1270,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_sm8250_usbphy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, -- GitLab From e965ab8216a419fadb4520b65a95dc7017daa800 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:42 +0100 Subject: [PATCH 390/694] phy: qcom-qmp-combo: fix sdm845 reset The SDM845 has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Add a dedicated configuration for SDM845 rather than reuse the incompatible SC7180 configuration. Fixes: d88497fb6bbd ("phy: qualcomm: phy-qcom-qmp: add support for combo USB3+DP phy on SDM845") Cc: stable@vger.kernel.org # 6.1 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 39 ++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index bb38b18258ca2..cc53e2f991218 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1084,9 +1084,46 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .has_pwrdn_delay = true, }; +static const struct qmp_phy_cfg sdm845_dpphy_cfg = { + .type = PHY_TYPE_DP, + .lanes = 2, + + .serdes_tbl = qmp_v3_dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .tx_tbl = qmp_v3_dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + + .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v3_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v3_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v3_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr3), + + .swing_hbr_rbr = &qmp_dp_v3_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v3_pre_emphasis_hbr_rbr, + .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, + .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, + + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, +}; + static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { .usb_cfg = &sdm845_usb3phy_cfg, - .dp_cfg = &sc7180_dpphy_cfg, + .dp_cfg = &sdm845_dpphy_cfg, }; static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { -- GitLab From 910dd4883d757af5faac92590f33f0f7da963032 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:43 +0100 Subject: [PATCH 391/694] phy: qcom-qmp-combo: fix sc8180x reset The SC8180X has two resets but the DP configuration erroneously described only one. In case the DP part of the PHY is initialised before the USB part (e.g. depending on probe order), then only the first reset would be asserted. Fixes: 1633802cd4ac ("phy: qcom: qmp: Add SC8180x USB/DP combo") Cc: stable@vger.kernel.org # 5.15 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cc53e2f991218..40c25a0ead239 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1177,8 +1177,8 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v3_usb3phy_regs_layout, -- GitLab From 7a7d86d14d073dfa3429c550667a8e78b99edbd4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:44 +0100 Subject: [PATCH 392/694] phy: qcom-qmp-combo: fix broken power on The PHY is powered on during phy-init by setting the SW_PWRDN bit in the COM_POWER_DOWN_CTRL register and then setting the same bit in the in the PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the PHY. Currently, whether power on succeeds depends on probe order and having the USB part of the PHY be initialised first. In case the DP part of the PHY is instead initialised first, the intended power on of the USB block results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8). Add a pointer to the USB part of the PHY to the driver data and use that to power on the PHY also if the DP part of the PHY is initialised first. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Cc: stable@vger.kernel.org # 5.10 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 40c25a0ead239..17707f68d482d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -932,6 +932,7 @@ struct qcom_qmp { struct regulator_bulk_data *vregs; struct qmp_phy **phys; + struct qmp_phy *usb_phy; struct mutex phy_mutex; int init_count; @@ -1911,7 +1912,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + struct qmp_phy *usb_phy = qmp->usb_phy; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1963,7 +1964,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); + qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -2831,6 +2833,8 @@ static int qmp_combo_probe(struct platform_device *pdev) goto err_node_put; } + qmp->usb_phy = qmp->phys[id]; + /* * Register the pipe clock provided by phy. * See function description to see details of this pipe clock. @@ -2846,6 +2850,9 @@ static int qmp_combo_probe(struct platform_device *pdev) id++; } + if (!qmp->usb_phy) + return -EINVAL; + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); -- GitLab From c7b98de745cffdceefc077ad5cf9cda032ef8959 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:45 +0100 Subject: [PATCH 393/694] phy: qcom-qmp-combo: fix runtime suspend Drop the confused runtime-suspend type check which effectively broke runtime PM if the DP child node happens to be parsed before the USB child node during probe (e.g. due to order of child nodes in the devicetree). Instead use the new driver data USB PHY pointer to access the USB configuration and resources. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 17707f68d482d..fde30205f3323 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2207,15 +2207,11 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) { struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; + struct qmp_phy *qphy = qmp->usb_phy; const struct qmp_phy_cfg *cfg = qphy->cfg; dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); - /* Supported only for USB3 PHY and luckily USB3 is the first phy */ - if (cfg->type != PHY_TYPE_USB3) - return 0; - if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; @@ -2232,16 +2228,12 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) { struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->phys[0]; + struct qmp_phy *qphy = qmp->usb_phy; const struct qmp_phy_cfg *cfg = qphy->cfg; int ret = 0; dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); - /* Supported only for USB3 PHY and luckily USB3 is the first phy */ - if (cfg->type != PHY_TYPE_USB3) - return 0; - if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; -- GitLab From c209b1b0e1e87e862099482e62a2f2d0bef8e989 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:46 +0100 Subject: [PATCH 394/694] phy: qcom-qmp-combo: clean up common initialisation Commit 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") added support for the DisplayPort part of QMP PHYs but unfortunately did so by duplicating parts of the shared configuration, something which has lead to subtle bugs depending on probe order. As the resources have always been requested based on the USB configuration, make sure to not rely on fields from the DP configuration when using them (e.g. in case they get out of sync) and remove the now unused fields from the DP configurations. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 47 ++--------------------- 1 file changed, 4 insertions(+), 43 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index fde30205f3323..c7a926d548d8f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1043,14 +1043,6 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, @@ -1108,14 +1100,6 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, @@ -1176,14 +1160,6 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, @@ -1240,14 +1216,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, @@ -1308,14 +1276,6 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .clk_list = qmp_v4_sm8250_usbphy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, @@ -1911,8 +1871,8 @@ static int qcom_qmp_dp_phy_calibrate(struct phy *phy) static int qmp_combo_com_init(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; struct qmp_phy *usb_phy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = usb_phy->cfg; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1964,7 +1924,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -1984,7 +1944,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) static int qmp_combo_com_exit(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_phy *usb_phy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = usb_phy->cfg; mutex_lock(&qmp->phy_mutex); if (--qmp->init_count) { -- GitLab From a173ee25a758927adc12664d1ec162a18324a4bd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:00 +0100 Subject: [PATCH 395/694] phy: qcom-qmp-combo: sort device-id table Sort the device-id table by compatible string to make it easier to find and add new entries. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c7a926d548d8f..d6a031bcfc302 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2651,14 +2651,6 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sc7180-qmp-usb3-dp-phy", .data = &sc7180_usb3dpphy_cfg, }, - { - .compatible = "qcom,sdm845-qmp-usb3-dp-phy", - .data = &sdm845_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sm8250-qmp-usb3-dp-phy", - .data = &sm8250_usb3dpphy_cfg, - }, { .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", .data = &sc8180x_usb3dpphy_cfg, @@ -2667,6 +2659,14 @@ static const struct of_device_id qmp_combo_of_match_table[] = { .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", .data = &sc8280xp_usb43dpphy_combo_cfg, }, + { + .compatible = "qcom,sdm845-qmp-usb3-dp-phy", + .data = &sdm845_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sm8250-qmp-usb3-dp-phy", + .data = &sm8250_usb3dpphy_cfg, + }, { } }; MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); -- GitLab From 5c5f9fbc15aa58c1bac22724f429e6e399a2f2b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:01 +0100 Subject: [PATCH 396/694] phy: qcom-qmp-combo: move device-id table Move the device-id table after probe() and next to the driver structure to keep the driver callback functions grouped together. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index d6a031bcfc302..e7c8c4417142b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2646,31 +2646,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct of_device_id qmp_combo_of_match_table[] = { - { - .compatible = "qcom,sc7180-qmp-usb3-dp-phy", - .data = &sc7180_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", - .data = &sc8180x_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", - .data = &sc8280xp_usb43dpphy_combo_cfg, - }, - { - .compatible = "qcom,sdm845-qmp-usb3-dp-phy", - .data = &sdm845_usb3dpphy_cfg, - }, - { - .compatible = "qcom,sm8250-qmp-usb3-dp-phy", - .data = &sm8250_usb3dpphy_cfg, - }, - { } -}; -MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); - static const struct dev_pm_ops qmp_combo_pm_ops = { SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, qmp_combo_runtime_resume, NULL) @@ -2815,6 +2790,31 @@ static int qmp_combo_probe(struct platform_device *pdev) return ret; } +static const struct of_device_id qmp_combo_of_match_table[] = { + { + .compatible = "qcom,sc7180-qmp-usb3-dp-phy", + .data = &sc7180_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sc8180x-qmp-usb3-dp-phy", + .data = &sc8180x_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", + .data = &sc8280xp_usb43dpphy_combo_cfg, + }, + { + .compatible = "qcom,sdm845-qmp-usb3-dp-phy", + .data = &sdm845_usb3dpphy_cfg, + }, + { + .compatible = "qcom,sm8250-qmp-usb3-dp-phy", + .data = &sm8250_usb3dpphy_cfg, + }, + { } +}; +MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table); + static struct platform_driver qmp_combo_driver = { .probe = qmp_combo_probe, .driver = { -- GitLab From 987a505fa7d79691013ec4bd325ecc5664781c81 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:02 +0100 Subject: [PATCH 397/694] phy: qcom-qmp-combo: move pm ops Move the PM ops structure next to the implementation to keep the driver callbacks grouped. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index e7c8c4417142b..2ac29b71d3b72 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2216,6 +2216,11 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) return 0; } +static const struct dev_pm_ops qmp_combo_pm_ops = { + SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, + qmp_combo_runtime_resume, NULL) +}; + static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); @@ -2646,11 +2651,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, return 0; } -static const struct dev_pm_ops qmp_combo_pm_ops = { - SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend, - qmp_combo_runtime_resume, NULL) -}; - static int qmp_combo_probe(struct platform_device *pdev) { struct qcom_qmp *qmp; -- GitLab From d6c81688f9cd1d198475383c963cd3d9576d29c2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:03 +0100 Subject: [PATCH 398/694] phy: qcom-qmp-combo: rename PHY ops structures Rename the PHY operation structures so that they have a "phy_ops" suffix and move them next to the implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2ac29b71d3b72..13800c2243e11 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2118,6 +2118,24 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) return 0; } +static const struct phy_ops qmp_combo_usb_phy_ops = { + .init = qmp_combo_enable, + .exit = qmp_combo_disable, + .set_mode = qmp_combo_set_mode, + .owner = THIS_MODULE, +}; + +static const struct phy_ops qmp_combo_dp_phy_ops = { + .init = qmp_combo_init, + .configure = qcom_qmp_dp_phy_configure, + .power_on = qmp_combo_power_on, + .calibrate = qcom_qmp_dp_phy_calibrate, + .power_off = qmp_combo_power_off, + .exit = qmp_combo_exit, + .set_mode = qmp_combo_set_mode, + .owner = THIS_MODULE, +}; + static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -2542,24 +2560,6 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static const struct phy_ops qmp_combo_usb_ops = { - .init = qmp_combo_enable, - .exit = qmp_combo_disable, - .set_mode = qmp_combo_set_mode, - .owner = THIS_MODULE, -}; - -static const struct phy_ops qmp_combo_dp_ops = { - .init = qmp_combo_init, - .configure = qcom_qmp_dp_phy_configure, - .power_on = qmp_combo_power_on, - .calibrate = qcom_qmp_dp_phy_calibrate, - .power_off = qmp_combo_power_off, - .exit = qmp_combo_exit, - .set_mode = qmp_combo_set_mode, - .owner = THIS_MODULE, -}; - static int qmp_combo_create(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { @@ -2632,9 +2632,9 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, } if (cfg->type == PHY_TYPE_DP) - ops = &qmp_combo_dp_ops; + ops = &qmp_combo_dp_phy_ops; else - ops = &qmp_combo_usb_ops; + ops = &qmp_combo_usb_phy_ops; generic_phy = devm_phy_create(dev, np, ops); if (IS_ERR(generic_phy)) { -- GitLab From 73d262f8e7ff095965bac0c4bf538f601257f53b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:04 +0100 Subject: [PATCH 399/694] phy: qcom-qmp-combo: drop unused DP PHY mode op The set-mode operation is currently only used by the USB part of the PHY so drop the corresponding callback from the DP PHY ops. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 13800c2243e11..cec487560fd73 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2132,7 +2132,6 @@ static const struct phy_ops qmp_combo_dp_phy_ops = { .calibrate = qcom_qmp_dp_phy_calibrate, .power_off = qmp_combo_power_off, .exit = qmp_combo_exit, - .set_mode = qmp_combo_set_mode, .owner = THIS_MODULE, }; -- GitLab From ae1cdc709762129c33ab64f400c38c9a177189f1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:05 +0100 Subject: [PATCH 400/694] phy: qcom-qmp-combo: rename USB PHY ops Add a "usb" infix to the USB PHY operation functions and name them after the corresponding operations (e.g. "init" rather than "enable"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cec487560fd73..bf0874d22d917 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2084,7 +2084,7 @@ static int qmp_combo_exit(struct phy *phy) return 0; } -static int qmp_combo_enable(struct phy *phy) +static int qmp_combo_usb_init(struct phy *phy) { int ret; @@ -2099,7 +2099,7 @@ static int qmp_combo_enable(struct phy *phy) return ret; } -static int qmp_combo_disable(struct phy *phy) +static int qmp_combo_usb_exit(struct phy *phy) { int ret; @@ -2109,7 +2109,7 @@ static int qmp_combo_disable(struct phy *phy) return qmp_combo_exit(phy); } -static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) +static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct qmp_phy *qphy = phy_get_drvdata(phy); @@ -2119,9 +2119,9 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode) } static const struct phy_ops qmp_combo_usb_phy_ops = { - .init = qmp_combo_enable, - .exit = qmp_combo_disable, - .set_mode = qmp_combo_set_mode, + .init = qmp_combo_usb_init, + .exit = qmp_combo_usb_exit, + .set_mode = qmp_combo_usb_set_mode, .owner = THIS_MODULE, }; -- GitLab From 0537692bbec18a173e1bda87f2bd024b3684b47d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:06 +0100 Subject: [PATCH 401/694] phy: qcom-qmp-combo: drop unnecessary debug message Drop the unnecessary (verbose) debug message from the init PHY op. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index bf0874d22d917..ae04f6219fd2a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1967,10 +1967,8 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) static int qmp_combo_init(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; int ret; - dev_vdbg(qmp->dev, "Initializing QMP phy\n"); ret = qmp_combo_com_init(qphy); if (ret) -- GitLab From 8c75d9eab1dd402bde2fb1337db2130a409c2743 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:07 +0100 Subject: [PATCH 402/694] phy: qcom-qmp-combo: separate USB and DP init ops Separate the USB and DP init and exit operations by calling the common initialisation code directly from the USB operation and adding a "dp" infix to the DP callbacks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 36 ++++++++++++----------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index ae04f6219fd2a..5d985195df389 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1964,7 +1964,7 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) return 0; } -static int qmp_combo_init(struct phy *phy) +static int qmp_combo_dp_init(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -1974,8 +1974,16 @@ static int qmp_combo_init(struct phy *phy) if (ret) return ret; - if (cfg->type == PHY_TYPE_DP) - cfg->dp_aux_init(qphy); + cfg->dp_aux_init(qphy); + + return 0; +} + +static int qmp_combo_dp_exit(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + qmp_combo_com_exit(qphy); return 0; } @@ -2073,38 +2081,32 @@ static int qmp_combo_power_off(struct phy *phy) return 0; } -static int qmp_combo_exit(struct phy *phy) -{ - struct qmp_phy *qphy = phy_get_drvdata(phy); - - qmp_combo_com_exit(qphy); - - return 0; -} - static int qmp_combo_usb_init(struct phy *phy) { + struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; - ret = qmp_combo_init(phy); + ret = qmp_combo_com_init(qphy); if (ret) return ret; ret = qmp_combo_power_on(phy); if (ret) - qmp_combo_exit(phy); + qmp_combo_com_exit(qphy); return ret; } static int qmp_combo_usb_exit(struct phy *phy) { + struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; ret = qmp_combo_power_off(phy); if (ret) return ret; - return qmp_combo_exit(phy); + + return qmp_combo_com_exit(qphy); } static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) @@ -2124,12 +2126,12 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { }; static const struct phy_ops qmp_combo_dp_phy_ops = { - .init = qmp_combo_init, + .init = qmp_combo_dp_init, .configure = qcom_qmp_dp_phy_configure, .power_on = qmp_combo_power_on, .calibrate = qcom_qmp_dp_phy_calibrate, .power_off = qmp_combo_power_off, - .exit = qmp_combo_exit, + .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, }; -- GitLab From 186266f65e68c16ba6714c9b7f561ddcd4998cae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:08 +0100 Subject: [PATCH 403/694] phy: qcom-qmp-combo: rename DP PHY ops Rename the configure and calibrate DP PHY ops using the common prefix for consistency. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5d985195df389..7392ae460fd73 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1842,7 +1842,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) return 0; } -static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts *opts) +static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts) { const struct phy_configure_opts_dp *dp_opts = &opts->dp; struct qmp_phy *qphy = phy_get_drvdata(phy); @@ -1857,7 +1857,7 @@ static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts * return 0; } -static int qcom_qmp_dp_phy_calibrate(struct phy *phy) +static int qmp_combo_dp_calibrate(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; @@ -2127,9 +2127,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { static const struct phy_ops qmp_combo_dp_phy_ops = { .init = qmp_combo_dp_init, - .configure = qcom_qmp_dp_phy_configure, + .configure = qmp_combo_dp_configure, .power_on = qmp_combo_power_on, - .calibrate = qcom_qmp_dp_phy_calibrate, + .calibrate = qmp_combo_dp_calibrate, .power_off = qmp_combo_power_off, .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, -- GitLab From 3ade3ede57a0093da3b432ecceda36386d13a5e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:09 +0100 Subject: [PATCH 404/694] phy: qcom-qmp-combo: separate USB and DP power-on ops Separate the USB and DP power-on and power-off operations in two dedicated implementations. Note that the pipe clock is only used by the USB part of the PHY and that no DP configuration has a pcs (or rx) table or has has_pwrdn_delay set. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 100 +++++++++++++--------- 1 file changed, 59 insertions(+), 41 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7392ae460fd73..748fd32a6f728 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1988,7 +1988,39 @@ static int qmp_combo_dp_exit(struct phy *phy) return 0; } -static int qmp_combo_power_on(struct phy *phy) +static int qmp_combo_dp_power_on(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *tx = qphy->tx; + + qmp_combo_serdes_init(qphy); + + qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + + if (cfg->lanes >= 2) + qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + + /* Configure special DP tx tunings */ + cfg->configure_dp_tx(qphy); + + /* Configure link rate, swing, etc. */ + cfg->configure_dp_phy(qphy); + + return 0; +} + +static int qmp_combo_dp_power_off(struct phy *phy) +{ + struct qmp_phy *qphy = phy_get_drvdata(phy); + + /* Assert DP PHY power down */ + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + + return 0; +} + +static int qmp_combo_usb_power_on(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; @@ -2014,39 +2046,30 @@ static int qmp_combo_power_on(struct phy *phy) if (cfg->lanes >= 2) qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); - /* Configure special DP tx tunings */ - if (cfg->type == PHY_TYPE_DP) - cfg->configure_dp_tx(qphy); - qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); if (cfg->lanes >= 2) qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); - /* Configure link rate, swing, etc. */ - if (cfg->type == PHY_TYPE_DP) - cfg->configure_dp_phy(qphy); - else - qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); if (cfg->has_pwrdn_delay) usleep_range(10, 20); - if (cfg->type != PHY_TYPE_DP) { - /* Pull PHY out of reset state */ - qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* start SerDes and Phy-Coding-Sublayer */ - qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], - SERDES_START | PCS_START); + /* Pull PHY out of reset state */ + qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - status = pcs + cfg->regs[QPHY_PCS_STATUS]; - ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, - PHY_INIT_COMPLETE_TIMEOUT); - if (ret) { - dev_err(qmp->dev, "phy initialization timed-out\n"); - goto err_disable_pipe_clk; - } + /* start SerDes and Phy-Coding-Sublayer */ + qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); + + status = pcs + cfg->regs[QPHY_PCS_STATUS]; + ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200, + PHY_INIT_COMPLETE_TIMEOUT); + if (ret) { + dev_err(qmp->dev, "phy initialization timed-out\n"); + goto err_disable_pipe_clk; } + return 0; err_disable_pipe_clk: @@ -2055,28 +2078,23 @@ static int qmp_combo_power_on(struct phy *phy) return ret; } -static int qmp_combo_power_off(struct phy *phy) +static int qmp_combo_usb_power_off(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; clk_disable_unprepare(qphy->pipe_clk); - if (cfg->type == PHY_TYPE_DP) { - /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); - } else { - /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + /* PHY reset */ + qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); - /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], - SERDES_START | PCS_START); + /* stop SerDes and Phy-Coding-Sublayer */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + SERDES_START | PCS_START); - /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - SW_PWRDN); - } + /* Put PHY into POWER DOWN state: active low */ + qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + SW_PWRDN); return 0; } @@ -2090,7 +2108,7 @@ static int qmp_combo_usb_init(struct phy *phy) if (ret) return ret; - ret = qmp_combo_power_on(phy); + ret = qmp_combo_usb_power_on(phy); if (ret) qmp_combo_com_exit(qphy); @@ -2102,7 +2120,7 @@ static int qmp_combo_usb_exit(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); int ret; - ret = qmp_combo_power_off(phy); + ret = qmp_combo_usb_power_off(phy); if (ret) return ret; @@ -2128,9 +2146,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = { static const struct phy_ops qmp_combo_dp_phy_ops = { .init = qmp_combo_dp_init, .configure = qmp_combo_dp_configure, - .power_on = qmp_combo_power_on, + .power_on = qmp_combo_dp_power_on, .calibrate = qmp_combo_dp_calibrate, - .power_off = qmp_combo_power_off, + .power_off = qmp_combo_dp_power_off, .exit = qmp_combo_dp_exit, .owner = THIS_MODULE, }; -- GitLab From dae95d7f667d20ab81976f846ed0bf2c71dbfe18 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:10 +0100 Subject: [PATCH 405/694] phy: qcom-qmp-combo: clean up serdes initialisation Clean up serdes initialisation somewhat by making the current helper a dedicated helper for the DP part of the PHY. Note that no error is currently returned for non-supported link rates. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 49 +++++++++++------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 748fd32a6f728..c059e4aeecdbf 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1313,7 +1313,7 @@ static void qmp_combo_configure(void __iomem *base, qmp_combo_configure_lane(base, tbl, num, 0xff); } -static int qmp_combo_serdes_init(struct qmp_phy *qphy) +static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->serdes; @@ -1323,28 +1323,26 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy) qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); - if (cfg->type == PHY_TYPE_DP) { - switch (dp_opts->link_rate) { - case 1620: - qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, - cfg->serdes_tbl_rbr_num); - break; - case 2700: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, - cfg->serdes_tbl_hbr_num); - break; - case 5400: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, - cfg->serdes_tbl_hbr2_num); - break; - case 8100: - qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, - cfg->serdes_tbl_hbr3_num); - break; - default: - /* Other link rates aren't supported */ - return -EINVAL; - } + switch (dp_opts->link_rate) { + case 1620: + qmp_combo_configure(serdes, cfg->serdes_tbl_rbr, + cfg->serdes_tbl_rbr_num); + break; + case 2700: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr, + cfg->serdes_tbl_hbr_num); + break; + case 5400: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2, + cfg->serdes_tbl_hbr2_num); + break; + case 8100: + qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3, + cfg->serdes_tbl_hbr3_num); + break; + default: + /* Other link rates aren't supported */ + return -EINVAL; } return 0; @@ -1994,7 +1992,7 @@ static int qmp_combo_dp_power_on(struct phy *phy) const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *tx = qphy->tx; - qmp_combo_serdes_init(qphy); + qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); @@ -2025,6 +2023,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *serdes = qphy->serdes; void __iomem *tx = qphy->tx; void __iomem *rx = qphy->rx; void __iomem *pcs = qphy->pcs; @@ -2032,7 +2031,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) unsigned int val; int ret; - qmp_combo_serdes_init(qphy); + qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num); ret = clk_prepare_enable(qphy->pipe_clk); if (ret) { -- GitLab From c7fbe5bd14145425d38a3b1e4d59f1b3acff3eba Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:11 +0100 Subject: [PATCH 406/694] phy: qcom-qmp-combo: separate USB and DP devicetree parsing Separate the devicetree parsing of the USB and DP child nodes in two dedicated helpers in preparation for merging the driver data. Note that only the USB part of the PHY has a pipe clock and that the DP implementation only uses the tx/tx2 and pcs register regions. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 ++++++++++++++++------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c059e4aeecdbf..9c4528dff3165 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2576,13 +2576,12 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { struct qcom_qmp *qmp = dev_get_drvdata(dev); struct phy *generic_phy; struct qmp_phy *qphy; - const struct phy_ops *ops; int ret; qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); @@ -2592,7 +2591,57 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->cfg = cfg; qphy->serdes = serdes; /* - * Get memory resources for each PHY: + * Get memory resources from the DP child node: + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. + * For dual lane PHYs: tx2 -> 3, rx2 -> 4 + * + * Note that only tx/tx2 and pcs are used by the DP implementation. + */ + qphy->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qphy->tx)) + return PTR_ERR(qphy->tx); + + qphy->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qphy->pcs)) + return PTR_ERR(qphy->pcs); + + if (cfg->lanes >= 2) { + qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->tx2)) + return PTR_ERR(qphy->tx2); + } + + generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); + if (IS_ERR(generic_phy)) { + ret = PTR_ERR(generic_phy); + dev_err(dev, "failed to create DP PHY: %d\n", ret); + return ret; + } + + qphy->phy = generic_phy; + qphy->qmp = qmp; + qmp->phys[id] = qphy; + phy_set_drvdata(generic_phy, qphy); + + return 0; +} + +static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id, + void __iomem *serdes, const struct qmp_phy_cfg *cfg) +{ + struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct phy *generic_phy; + struct qmp_phy *qphy; + int ret; + + qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); + if (!qphy) + return -ENOMEM; + + qphy->cfg = cfg; + qphy->serdes = serdes; + /* + * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 * For single lane PHYs: pcs_misc (optional) -> 3. @@ -2631,31 +2680,16 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id, qphy->pcs_misc = NULL; } - /* - * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3 - * based phys, so they essentially have pipe clock. So, - * we return error in case phy is USB3 or PIPE type. - * Otherwise, we initialize pipe clock to NULL for - * all phys that don't need this. - */ qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); if (IS_ERR(qphy->pipe_clk)) { - if (cfg->type == PHY_TYPE_USB3) - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe_clk\n", - id); - qphy->pipe_clk = NULL; + return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), + "failed to get lane%d pipe_clk\n", id); } - if (cfg->type == PHY_TYPE_DP) - ops = &qmp_combo_dp_phy_ops; - else - ops = &qmp_combo_usb_phy_ops; - - generic_phy = devm_phy_create(dev, np, ops); + generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); if (IS_ERR(generic_phy)) { ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create qphy %d\n", ret); + dev_err(dev, "failed to create USB PHY: %d\n", ret); return ret; } @@ -2752,7 +2786,7 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = dp_serdes; /* Create per-lane phy */ - ret = qmp_combo_create(dev, child, id, serdes, cfg); + ret = qmp_combo_create_dp(dev, child, id, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); @@ -2770,7 +2804,7 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = usb_serdes; /* Create per-lane phy */ - ret = qmp_combo_create(dev, child, id, serdes, cfg); + ret = qmp_combo_create_usb(dev, child, id, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); -- GitLab From 4197a2a22df7804b40335ab638eae211acd1a81b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:12 +0100 Subject: [PATCH 407/694] phy: qcom-qmp-combo: add dedicated DP iomem pointers In preparation for merging the driver data, add separate iomem pointers for the DP part of the PHY. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 246 +++++++++++----------- 1 file changed, 126 insertions(+), 120 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9c4528dff3165..43193bfe6e11d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -895,6 +895,12 @@ struct qmp_phy { void __iomem *rx2; void __iomem *pcs_misc; void __iomem *pcs_usb; + + void __iomem *dp_serdes; + void __iomem *dp_tx; + void __iomem *dp_tx2; + void __iomem *dp_pcs; + struct clk *pipe_clk; struct qcom_qmp *qmp; enum phy_mode mode; @@ -1316,7 +1322,7 @@ static void qmp_combo_configure(void __iomem *base, static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; + void __iomem *serdes = qphy->dp_serdes; const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; int serdes_tbl_num = cfg->serdes_tbl_num; @@ -1352,43 +1358,43 @@ static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, - qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL | QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, - qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9); + qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); qphy->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, @@ -1421,10 +1427,10 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN; pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN; - writel(voltage_swing_cfg, qphy->tx + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->tx + emp_post_reg); - writel(voltage_swing_cfg, qphy->tx2 + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->tx2 + emp_post_reg); + writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg); + writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg); + writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg); + writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg); return 0; } @@ -1446,10 +1452,10 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) drvr_en = 0x10; } - writel(drvr_en, qphy->tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); - writel(drvr_en, qphy->tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); } static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) @@ -1472,12 +1478,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qphy->pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qphy->pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE); return reverse; } @@ -1491,8 +1497,8 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) qmp_combo_configure_dp_mode(qphy); - writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1515,40 +1521,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->serdes + QSERDES_V3_COM_RESETSM_CNTRL); + writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->serdes + QSERDES_V3_COM_C_READY_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1568,7 +1574,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy) qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qphy->dp_aux_cfg]; - writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1577,37 +1583,37 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->pcs + QSERDES_DP_PHY_PD_CTL); + qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ - writel(0x17, qphy->serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); + + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); qphy->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy) { /* Program default values before writing proper values */ - writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL, QSERDES_V4_TX_TX_EMP_POST1_LVL); @@ -1620,15 +1626,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qphy->pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); qmp_combo_configure_dp_mode(qphy); - writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1651,49 +1657,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->serdes + QSERDES_V4_COM_RESETSM_CNTRL); + writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_C_READY_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1737,30 +1743,30 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->tx + QSERDES_V4_TX_TX_POL_INV); - writel(0x0a, qphy->tx2 + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); - writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); return 0; } @@ -1794,30 +1800,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->tx + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x0a, qphy->tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x27, qphy->tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x27, qphy->tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x20, qphy->tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); return 0; } @@ -1835,7 +1841,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qphy->dp_aux_cfg]; - writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1990,14 +1996,14 @@ static int qmp_combo_dp_power_on(struct phy *phy) { struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->tx; + void __iomem *tx = qphy->dp_tx; qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); @@ -2013,7 +2019,7 @@ static int qmp_combo_dp_power_off(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); return 0; } @@ -2589,7 +2595,7 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i return -ENOMEM; qphy->cfg = cfg; - qphy->serdes = serdes; + qphy->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. @@ -2597,18 +2603,18 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i * * Note that only tx/tx2 and pcs are used by the DP implementation. */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qphy->dp_tx)) + return PTR_ERR(qphy->dp_tx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qphy->dp_pcs)) + return PTR_ERR(qphy->dp_pcs); if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->dp_tx2)) + return PTR_ERR(qphy->dp_tx2); } generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); -- GitLab From bc8615888f3e4dc8f3448b6b4f8dec04b8b5bce2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:13 +0100 Subject: [PATCH 408/694] phy: qcom-qmp-combo: clean up DP configurations In preparation for merging the USB and DP configurations, align the initialisations of the DP function pointers. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-15-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 43193bfe6e11d..50c011d23a9e1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1049,10 +1049,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { @@ -1106,10 +1106,10 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { @@ -1166,10 +1166,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { @@ -1222,10 +1222,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = { @@ -1282,10 +1282,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, + .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, + .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, + .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { -- GitLab From ad4db91d60636c2c28487c3f518eab5952511923 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:14 +0100 Subject: [PATCH 409/694] phy: qcom-qmp-combo: rename sc8280xp config In preparation for merging the USB and DP configurations, drop the "combo" infix from the SC8280XP combined configuration for consistency with the other platforms. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-16-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 50c011d23a9e1..d3fd6bde4af58 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -1228,7 +1228,7 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, }; -static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = { +static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { .usb_cfg = &sc8280xp_usb43dp_usb_cfg, .dp_cfg = &sc8280xp_usb43dp_dp_cfg, }; @@ -2857,7 +2857,7 @@ static const struct of_device_id qmp_combo_of_match_table[] = { }, { .compatible = "qcom,sc8280xp-qmp-usb43dp-phy", - .data = &sc8280xp_usb43dpphy_combo_cfg, + .data = &sc8280xp_usb43dpphy_cfg, }, { .compatible = "qcom,sdm845-qmp-usb3-dp-phy", -- GitLab From 488f116de075f2fd0cb90205a76e2ca0756efaff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:15 +0100 Subject: [PATCH 410/694] phy: qcom-qmp-combo: add DP configuration tables In preparation for merging the USB and DP configurations, add dedicated pointers for the DP serdes and tx tables to the configurations. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-17-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 45 ++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index d3fd6bde4af58..2588cfa5e81e1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -816,6 +816,11 @@ struct qmp_phy_cfg { const struct qmp_phy_init_tbl *pcs_usb_tbl; int pcs_usb_tbl_num; + const struct qmp_phy_init_tbl *dp_serdes_tbl; + int dp_serdes_tbl_num; + const struct qmp_phy_init_tbl *dp_tx_tbl; + int dp_tx_tbl_num; + /* Init sequence for DP PHY block link rates */ const struct qmp_phy_init_tbl *serdes_tbl_rbr; int serdes_tbl_rbr_num; @@ -1030,10 +1035,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v3_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), - .tx_tbl = qmp_v3_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .dp_tx_tbl = qmp_v3_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), @@ -1147,10 +1152,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v4_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), - .tx_tbl = qmp_v4_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), + .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1203,10 +1208,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v5_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), - .tx_tbl = qmp_v5_5nm_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl), + .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), + .dp_tx_tbl = qmp_v5_5nm_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1263,10 +1268,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .type = PHY_TYPE_DP, .lanes = 2, - .serdes_tbl = qmp_v4_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), - .tx_tbl = qmp_v4_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), + .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v4_dp_tx_tbl), .serdes_tbl_rbr = qmp_v4_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr), @@ -1324,10 +1329,8 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *serdes = qphy->dp_serdes; const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; - const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl; - int serdes_tbl_num = cfg->serdes_tbl_num; - qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num); + qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num); switch (dp_opts->link_rate) { case 1620: @@ -2000,10 +2003,10 @@ static int qmp_combo_dp_power_on(struct phy *phy) qmp_combo_dp_serdes_init(qphy); - qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); + qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); -- GitLab From ba0af7b346db8149e33a2f6e1a7b8265cabbfacb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:16 +0100 Subject: [PATCH 411/694] phy: qcom-qmp-combo: drop lanes config parameter Since the QMP driver split there is really no need for the 'lanes' configuration parameter as all of these USB-C PHYs support dual-lane SuperSpeed USB and quad-lane (uni-directional) DP (even if the driver still only supports CC1 orientation using lanes 2 and 3). Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221114110621.4639-18-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 62 ++++++++--------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2588cfa5e81e1..a0abeb7c3bcad 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -802,7 +802,6 @@ struct qmp_phy; struct qmp_phy_cfg { /* phy-type - PCIE/UFS/USB */ unsigned int type; - int lanes; /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; @@ -1010,7 +1009,6 @@ static const char * const sc7180_usb3phy_reset_l[] = { static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1033,7 +1031,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1067,7 +1064,6 @@ static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1090,7 +1086,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .serdes_tbl = qmp_v3_dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1124,7 +1119,6 @@ static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), @@ -1150,7 +1144,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1184,7 +1177,6 @@ static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), @@ -1206,7 +1198,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), @@ -1240,7 +1231,6 @@ static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .type = PHY_TYPE_USB3, - .lanes = 2, .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), @@ -1266,7 +1256,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .type = PHY_TYPE_DP, - .lanes = 2, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -2000,13 +1989,12 @@ static int qmp_combo_dp_power_on(struct phy *phy) struct qmp_phy *qphy = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qphy->cfg; void __iomem *tx = qphy->dp_tx; + void __iomem *tx2 = qphy->dp_tx2; qmp_combo_dp_serdes_init(qphy); qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); + qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ cfg->configure_dp_tx(qphy); @@ -2035,6 +2023,8 @@ static int qmp_combo_usb_power_on(struct phy *phy) void __iomem *serdes = qphy->serdes; void __iomem *tx = qphy->tx; void __iomem *rx = qphy->rx; + void __iomem *tx2 = qphy->tx2; + void __iomem *rx2 = qphy->rx2; void __iomem *pcs = qphy->pcs; void __iomem *status; unsigned int val; @@ -2050,14 +2040,10 @@ static int qmp_combo_usb_power_on(struct phy *phy) /* Tx, Rx, and PCS configurations */ qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); + qmp_combo_configure_lane(tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2); qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1); - - if (cfg->lanes >= 2) - qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); + qmp_combo_configure_lane(rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2); qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); @@ -2601,8 +2587,8 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i qphy->dp_serdes = serdes; /* * Get memory resources from the DP child node: - * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. - * For dual lane PHYs: tx2 -> 3, rx2 -> 4 + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; + * tx2 -> 3; rx2 -> 4 * * Note that only tx/tx2 and pcs are used by the DP implementation. */ @@ -2614,11 +2600,9 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i if (IS_ERR(qphy->dp_pcs)) return PTR_ERR(qphy->dp_pcs); - if (cfg->lanes >= 2) { - qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->dp_tx2)) - return PTR_ERR(qphy->dp_tx2); - } + qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->dp_tx2)) + return PTR_ERR(qphy->dp_tx2); generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); if (IS_ERR(generic_phy)) { @@ -2651,9 +2635,8 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int qphy->serdes = serdes; /* * Get memory resources from the USB child node: - * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2. - * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 - * For single lane PHYs: pcs_misc (optional) -> 3. + * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; + * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5 */ qphy->tx = devm_of_iomap(dev, np, 0, NULL); if (IS_ERR(qphy->tx)) @@ -2670,20 +2653,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int if (cfg->pcs_usb_offset) qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; - if (cfg->lanes >= 2) { - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qphy->tx2)) + return PTR_ERR(qphy->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); - - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); - } else { - qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); - } + qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qphy->rx2)) + return PTR_ERR(qphy->rx2); + qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); if (IS_ERR(qphy->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); qphy->pcs_misc = NULL; -- GitLab From 9e62877eefacecdcd0467cfeb6bcd20786465f9b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:17 +0100 Subject: [PATCH 412/694] phy: qcom-qmp-combo: merge USB and DP configurations It does not really make any sense to keep separate configuration structures for the USB and DP parts of the same PHY so merge them. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-19-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 182 +++++++--------------- 1 file changed, 57 insertions(+), 125 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index a0abeb7c3bcad..298477259ee62 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -798,11 +798,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { struct qmp_phy; -/* struct qmp_phy_cfg - per-PHY initialization config */ struct qmp_phy_cfg { - /* phy-type - PCIE/UFS/USB */ - unsigned int type; - /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -863,11 +859,6 @@ struct qmp_phy_cfg { }; -struct qmp_phy_combo_cfg { - const struct qmp_phy_cfg *usb_cfg; - const struct qmp_phy_cfg *dp_cfg; -}; - /** * struct qmp_phy - per-lane phy descriptor * @@ -1007,9 +998,7 @@ static const char * const sc7180_usb3phy_reset_l[] = { "phy", }; -static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), .tx_tbl = qmp_v3_usb3_tx_tbl, @@ -1018,19 +1007,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = { .rx_tbl_num = ARRAY_SIZE(qmp_v3_usb3_rx_tbl), .pcs_tbl = qmp_v3_usb3_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(qmp_v3_usb3_pcs_tbl), - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = sc7180_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sc7180_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), @@ -1055,16 +1031,19 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = { - .usb_cfg = &sc7180_usb3phy_cfg, - .dp_cfg = &sc7180_dpphy_cfg, -}; + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = sc7180_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, -static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { - .type = PHY_TYPE_USB3, + .has_pwrdn_delay = true, +}; +static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), .tx_tbl = qmp_v3_usb3_tx_tbl, @@ -1073,24 +1052,11 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = { .rx_tbl_num = ARRAY_SIZE(qmp_v3_usb3_rx_tbl), .pcs_tbl = qmp_v3_usb3_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(qmp_v3_usb3_pcs_tbl), - .clk_list = qmp_v3_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v3_usb3phy_regs_layout, - - .has_pwrdn_delay = true, -}; -static const struct qmp_phy_cfg sdm845_dpphy_cfg = { - .type = PHY_TYPE_DP, - - .serdes_tbl = qmp_v3_dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), - .tx_tbl = qmp_v3_dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), + .dp_serdes_tbl = qmp_v3_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl), + .dp_tx_tbl = qmp_v3_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v3_dp_tx_tbl), .serdes_tbl_rbr = qmp_v3_dp_serdes_tbl_rbr, .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr), @@ -1110,16 +1076,19 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = { - .usb_cfg = &sdm845_usb3phy_cfg, - .dp_cfg = &sdm845_dpphy_cfg, -}; + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, -static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { - .type = PHY_TYPE_USB3, + .has_pwrdn_delay = true, +}; +static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), .tx_tbl = sm8150_usb3_tx_tbl, @@ -1130,20 +1099,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1168,24 +1123,7 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, -}; - -static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = { - .usb_cfg = &sm8150_usb3phy_cfg, - .dp_cfg = &sc8180x_dpphy_cfg, -}; - -static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { - .type = PHY_TYPE_USB3, - .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), - .tx_tbl = sc8280xp_usb43dp_tx_tbl, - .tx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl), - .rx_tbl = sc8280xp_usb43dp_rx_tbl, - .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl), - .pcs_tbl = sc8280xp_usb43dp_pcs_tbl, - .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, @@ -1194,10 +1132,19 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = { .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, .pcs_usb_offset = 0x300, + + .has_pwrdn_delay = true, }; -static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { - .type = PHY_TYPE_DP, +static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { + .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), + .tx_tbl = sc8280xp_usb43dp_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl), + .rx_tbl = sc8280xp_usb43dp_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl), + .pcs_tbl = sc8280xp_usb43dp_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl), .dp_serdes_tbl = qmp_v5_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v5_dp_serdes_tbl), @@ -1222,16 +1169,18 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = { - .usb_cfg = &sc8280xp_usb43dp_usb_cfg, - .dp_cfg = &sc8280xp_usb43dp_dp_cfg, + .clk_list = qmp_v4_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x300, }; -static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { - .type = PHY_TYPE_USB3, - +static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { .serdes_tbl = sm8150_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sm8150_usb3_serdes_tbl), .tx_tbl = sm8250_usb3_tx_tbl, @@ -1242,20 +1191,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8250_usb3_pcs_tbl), .pcs_usb_tbl = sm8250_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8250_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_sm8250_usbphy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), - .reset_list = msm8996_usb3phy_reset_l, - .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), - .vreg_list = qmp_phy_vreg_l, - .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, - - .has_pwrdn_delay = true, -}; - -static const struct qmp_phy_cfg sm8250_dpphy_cfg = { - .type = PHY_TYPE_DP, .dp_serdes_tbl = qmp_v4_dp_serdes_tbl, .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl), @@ -1280,11 +1215,17 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, -}; -static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = { - .usb_cfg = &sm8250_usb3phy_cfg, - .dp_cfg = &sm8250_dpphy_cfg, + .clk_list = qmp_v4_sm8250_usbphy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v4_usb3phy_regs_layout, + .pcs_usb_offset = 0x300, + + .has_pwrdn_delay = true, }; static void qmp_combo_configure_lane(void __iomem *base, @@ -2697,10 +2638,7 @@ static int qmp_combo_probe(struct platform_device *pdev) void __iomem *serdes; void __iomem *usb_serdes; void __iomem *dp_serdes = NULL; - const struct qmp_phy_combo_cfg *combo_cfg = NULL; const struct qmp_phy_cfg *cfg = NULL; - const struct qmp_phy_cfg *usb_cfg = NULL; - const struct qmp_phy_cfg *dp_cfg = NULL; int num, id, expected_phys; int ret; @@ -2711,13 +2649,10 @@ static int qmp_combo_probe(struct platform_device *pdev) qmp->dev = dev; dev_set_drvdata(dev, qmp); - combo_cfg = of_device_get_match_data(dev); - if (!combo_cfg) + cfg = of_device_get_match_data(dev); + if (!cfg) return -EINVAL; - usb_cfg = combo_cfg->usb_cfg; - cfg = usb_cfg; /* Setup clks and regulators */ - usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(serdes)) return PTR_ERR(serdes); @@ -2730,7 +2665,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); - dp_cfg = combo_cfg->dp_cfg; expected_phys = 2; mutex_init(&qmp->phy_mutex); @@ -2769,7 +2703,6 @@ static int qmp_combo_probe(struct platform_device *pdev) id = 0; for_each_available_child_of_node(dev->of_node, child) { if (of_node_name_eq(child, "dp-phy")) { - cfg = dp_cfg; serdes = dp_serdes; /* Create per-lane phy */ @@ -2787,7 +2720,6 @@ static int qmp_combo_probe(struct platform_device *pdev) goto err_node_put; } } else if (of_node_name_eq(child, "usb3-phy")) { - cfg = usb_cfg; serdes = usb_serdes; /* Create per-lane phy */ -- GitLab From dd1153651b0383ee9597609bc449d1751eefdcae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:18 +0100 Subject: [PATCH 413/694] phy: qcom-qmp-combo: merge driver data The QMP combo driver manages a single PHY (even if it provides two interfaces for USB and DP, respectively) so merge the old qcom_qmp and qmp_phy structures and drop the PHY array. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-20-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 688 ++++++++++------------ 1 file changed, 312 insertions(+), 376 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 298477259ee62..707dd68ba9935 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -796,7 +796,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { { 0x3f, 0xff, 0xff, 0xff } }; -struct qmp_phy; +struct qmp_combo; struct qmp_phy_cfg { /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ @@ -833,10 +833,10 @@ struct qmp_phy_cfg { const u8 (*pre_emphasis_hbr3_hbr2)[4][4]; /* DP PHY callbacks */ - int (*configure_dp_phy)(struct qmp_phy *qphy); - void (*configure_dp_tx)(struct qmp_phy *qphy); - int (*calibrate_dp_phy)(struct qmp_phy *qphy); - void (*dp_aux_init)(struct qmp_phy *qphy); + int (*configure_dp_phy)(struct qmp_combo *qmp); + void (*configure_dp_tx)(struct qmp_combo *qmp); + int (*calibrate_dp_phy)(struct qmp_combo *qmp); + void (*dp_aux_init)(struct qmp_combo *qmp); /* clock ids to be requested */ const char * const *clk_list; @@ -859,29 +859,19 @@ struct qmp_phy_cfg { }; -/** - * struct qmp_phy - per-lane phy descriptor - * - * @phy: generic phy - * @cfg: phy specific configuration - * @serdes: iomapped memory space for phy's serdes (i.e. PLL) - * @tx: iomapped memory space for lane's tx - * @rx: iomapped memory space for lane's rx - * @pcs: iomapped memory space for lane's pcs - * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs) - * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs) - * @pcs_misc: iomapped memory space for lane's pcs_misc - * @pcs_usb: iomapped memory space for lane's pcs_usb - * @pipe_clk: pipe clock - * @qmp: QMP phy to which this lane belongs - * @mode: current PHY mode - * @dp_aux_cfg: Display port aux config - * @dp_opts: Display port optional config - * @dp_clks: Display port clocks - */ -struct qmp_phy { - struct phy *phy; +struct qmp_phy_dp_clks { + struct qmp_combo *qmp; + struct clk_hw dp_link_hw; + struct clk_hw dp_pixel_hw; +}; + +struct qmp_combo { + struct device *dev; + const struct qmp_phy_cfg *cfg; + + void __iomem *dp_com; + void __iomem *serdes; void __iomem *tx; void __iomem *rx; @@ -897,59 +887,33 @@ struct qmp_phy { void __iomem *dp_pcs; struct clk *pipe_clk; - struct qcom_qmp *qmp; - enum phy_mode mode; - unsigned int dp_aux_cfg; - struct phy_configure_opts_dp dp_opts; - struct qmp_phy_dp_clks *dp_clks; -}; - -struct qmp_phy_dp_clks { - struct qmp_phy *qphy; - struct clk_hw dp_link_hw; - struct clk_hw dp_pixel_hw; -}; - -/** - * struct qcom_qmp - structure holding QMP phy block attributes - * - * @dev: device - * @dp_com: iomapped memory space for phy's dp_com control block - * - * @clks: array of clocks required by phy - * @resets: array of resets required by phy - * @vregs: regulator supplies bulk data - * - * @phys: array of per-lane phy descriptors - * @phy_mutex: mutex lock for PHY common block initialization - * @init_count: phy common block initialization count - */ -struct qcom_qmp { - struct device *dev; - void __iomem *dp_com; - struct clk_bulk_data *clks; struct reset_control_bulk_data *resets; struct regulator_bulk_data *vregs; - struct qmp_phy **phys; - struct qmp_phy *usb_phy; - struct mutex phy_mutex; int init_count; + + struct phy *usb_phy; + enum phy_mode mode; + + struct phy *dp_phy; + unsigned int dp_aux_cfg; + struct phy_configure_opts_dp dp_opts; + struct qmp_phy_dp_clks *dp_clks; }; -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy); -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy); -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy); -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy); +static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp); +static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp); +static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp); -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy); -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy); -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy); -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy); +static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp); +static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp); +static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp); -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy); +static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp); static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) { @@ -1254,11 +1218,11 @@ static void qmp_combo_configure(void __iomem *base, qmp_combo_configure_lane(base, tbl, num, 0xff); } -static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) +static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->dp_serdes; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->dp_serdes; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num); @@ -1287,54 +1251,54 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy) return 0; } -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy) +static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, - qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL | QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, - qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); - qphy->dp_aux_cfg = 0; + qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); + + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } -static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, +static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, unsigned int drv_lvl_reg, unsigned int emp_post_reg) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; - const struct qmp_phy_cfg *cfg = qphy->cfg; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; + const struct qmp_phy_cfg *cfg = qmp->cfg; unsigned int v_level = 0, p_level = 0; u8 voltage_swing_cfg, pre_emphasis_cfg; int i; @@ -1360,20 +1324,20 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy, voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN; pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN; - writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg); - writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg); - writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg); + writel(voltage_swing_cfg, qmp->dp_tx + drv_lvl_reg); + writel(pre_emphasis_cfg, qmp->dp_tx + emp_post_reg); + writel(voltage_swing_cfg, qmp->dp_tx2 + drv_lvl_reg); + writel(pre_emphasis_cfg, qmp->dp_tx2 + emp_post_reg); return 0; } -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) +static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias_en, drvr_en; - if (qmp_combo_configure_dp_swing(qphy, QSERDES_V3_TX_TX_DRV_LVL, + if (qmp_combo_configure_dp_swing(qmp, QSERDES_V3_TX_TX_DRV_LVL, QSERDES_V3_TX_TX_EMP_POST1_LVL) < 0) return; @@ -1385,13 +1349,13 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy) drvr_en = 0x10; } - writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); - writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); - writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qmp->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qmp->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); + writel(drvr_en, qmp->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN); + writel(bias_en, qmp->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN); } -static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) +static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) { u32 val; bool reverse = false; @@ -1411,27 +1375,27 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE); return reverse; } -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; - qmp_combo_configure_dp_mode(qphy); + qmp_combo_configure_dp_mode(qmp); - writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1454,40 +1418,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); + writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V3_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1498,76 +1462,76 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy) +static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d }; u8 val; - qphy->dp_aux_cfg++; - qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); - val = cfg1_settings[qphy->dp_aux_cfg]; + qmp->dp_aux_cfg++; + qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); + val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy) +static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ - writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); - qphy->dp_aux_cfg = 0; + writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); + + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy) +static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) { /* Program default values before writing proper values */ - writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); - qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL, + qmp_combo_configure_dp_swing(qmp, QSERDES_V4_TX_TX_DRV_LVL, QSERDES_V4_TX_TX_EMP_POST1_LVL); } -static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks; - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); - qmp_combo_configure_dp_mode(qphy); + qmp_combo_configure_dp_mode(qmp); - writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1590,49 +1554,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); + writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_C_READY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS, + if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1642,15 +1606,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy) return 0; } -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; bool reverse = false; u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qphy); + ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1676,43 +1640,43 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qmp->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qmp->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV); - writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx + QSERDES_V4_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx2 + QSERDES_V4_TX_TX_POL_INV); - writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL); return 0; } -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) +static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) { - const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts; + const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; bool reverse = false; u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qphy); + ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1733,30 +1697,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) drvr1_en = 0x10; } - writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); - writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); + writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); + writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV); + writel(0x0a, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV); - writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL); + writel(0x27, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL); - writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); - writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); + writel(0x20, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL); return 0; } @@ -1765,16 +1729,16 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) +static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d }; u8 val; - qphy->dp_aux_cfg++; - qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); - val = cfg1_settings[qphy->dp_aux_cfg]; + qmp->dp_aux_cfg++; + qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); + val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1782,13 +1746,13 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy) static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts) { const struct phy_configure_opts_dp *dp_opts = &opts->dp; - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - memcpy(&qphy->dp_opts, dp_opts, sizeof(*dp_opts)); - if (qphy->dp_opts.set_voltages) { - cfg->configure_dp_tx(qphy); - qphy->dp_opts.set_voltages = 0; + memcpy(&qmp->dp_opts, dp_opts, sizeof(*dp_opts)); + if (qmp->dp_opts.set_voltages) { + cfg->configure_dp_tx(qmp); + qmp->dp_opts.set_voltages = 0; } return 0; @@ -1796,20 +1760,18 @@ static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opt static int qmp_combo_dp_calibrate(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; if (cfg->calibrate_dp_phy) - return cfg->calibrate_dp_phy(qphy); + return cfg->calibrate_dp_phy(qmp); return 0; } -static int qmp_combo_com_init(struct qmp_phy *qphy) +static int qmp_combo_com_init(struct qmp_combo *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - struct qmp_phy *usb_phy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = usb_phy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *dp_com = qmp->dp_com; int ret; @@ -1861,7 +1823,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); mutex_unlock(&qmp->phy_mutex); @@ -1878,11 +1840,9 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) return ret; } -static int qmp_combo_com_exit(struct qmp_phy *qphy) +static int qmp_combo_com_exit(struct qmp_combo *qmp) { - struct qcom_qmp *qmp = qphy->qmp; - struct qmp_phy *usb_phy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = usb_phy->cfg; + const struct qmp_phy_cfg *cfg = qmp->cfg; mutex_lock(&qmp->phy_mutex); if (--qmp->init_count) { @@ -1903,77 +1863,76 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy) static int qmp_combo_dp_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret; - ret = qmp_combo_com_init(qphy); + ret = qmp_combo_com_init(qmp); if (ret) return ret; - cfg->dp_aux_init(qphy); + cfg->dp_aux_init(qmp); return 0; } static int qmp_combo_dp_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); - qmp_combo_com_exit(qphy); + qmp_combo_com_exit(qmp); return 0; } static int qmp_combo_dp_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *tx = qphy->dp_tx; - void __iomem *tx2 = qphy->dp_tx2; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *tx = qmp->dp_tx; + void __iomem *tx2 = qmp->dp_tx2; - qmp_combo_dp_serdes_init(qphy); + qmp_combo_dp_serdes_init(qmp); qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1); qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2); /* Configure special DP tx tunings */ - cfg->configure_dp_tx(qphy); + cfg->configure_dp_tx(qmp); /* Configure link rate, swing, etc. */ - cfg->configure_dp_phy(qphy); + cfg->configure_dp_phy(qmp); return 0; } static int qmp_combo_dp_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); return 0; } static int qmp_combo_usb_power_on(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - struct qcom_qmp *qmp = qphy->qmp; - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *serdes = qphy->serdes; - void __iomem *tx = qphy->tx; - void __iomem *rx = qphy->rx; - void __iomem *tx2 = qphy->tx2; - void __iomem *rx2 = qphy->rx2; - void __iomem *pcs = qphy->pcs; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *serdes = qmp->serdes; + void __iomem *tx = qmp->tx; + void __iomem *rx = qmp->rx; + void __iomem *tx2 = qmp->tx2; + void __iomem *rx2 = qmp->rx2; + void __iomem *pcs = qmp->pcs; void __iomem *status; unsigned int val; int ret; qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num); - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret); return ret; @@ -2008,27 +1967,27 @@ static int qmp_combo_usb_power_on(struct phy *phy) return 0; err_disable_pipe_clk: - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); return ret; } static int qmp_combo_usb_power_off(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = phy_get_drvdata(phy); + const struct qmp_phy_cfg *cfg = qmp->cfg; - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); /* PHY reset */ - qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); + qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET); /* stop SerDes and Phy-Coding-Sublayer */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START); /* Put PHY into POWER DOWN state: active low */ - qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); return 0; @@ -2036,37 +1995,37 @@ static int qmp_combo_usb_power_off(struct phy *phy) static int qmp_combo_usb_init(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); int ret; - ret = qmp_combo_com_init(qphy); + ret = qmp_combo_com_init(qmp); if (ret) return ret; ret = qmp_combo_usb_power_on(phy); if (ret) - qmp_combo_com_exit(qphy); + qmp_combo_com_exit(qmp); return ret; } static int qmp_combo_usb_exit(struct phy *phy) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); int ret; ret = qmp_combo_usb_power_off(phy); if (ret) return ret; - return qmp_combo_com_exit(qphy); + return qmp_combo_com_exit(qmp); } static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode) { - struct qmp_phy *qphy = phy_get_drvdata(phy); + struct qmp_combo *qmp = phy_get_drvdata(phy); - qphy->mode = mode; + qmp->mode = mode; return 0; } @@ -2088,15 +2047,15 @@ static const struct phy_ops qmp_combo_dp_phy_ops = { .owner = THIS_MODULE, }; -static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_combo_enable_autonomous_mode(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; u32 intr_mask; - if (qphy->mode == PHY_MODE_USB_HOST_SS || - qphy->mode == PHY_MODE_USB_DEVICE_SS) + if (qmp->mode == PHY_MODE_USB_HOST_SS || + qmp->mode == PHY_MODE_USB_DEVICE_SS) intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN; else intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL; @@ -2117,11 +2076,11 @@ static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN); } -static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) +static void qmp_combo_disable_autonomous_mode(struct qmp_combo *qmp) { - const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; - void __iomem *pcs_misc = qphy->pcs_misc; + const struct qmp_phy_cfg *cfg = qmp->cfg; + void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs; + void __iomem *pcs_misc = qmp->pcs_misc; /* Disable i/o clamp_n on resume for normal mode */ if (pcs_misc) @@ -2137,20 +2096,19 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; - dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode); if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); return 0; } - qmp_combo_enable_autonomous_mode(qphy); + qmp_combo_enable_autonomous_mode(qmp); - clk_disable_unprepare(qphy->pipe_clk); + clk_disable_unprepare(qmp->pipe_clk); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return 0; @@ -2158,12 +2116,11 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev) static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); - struct qmp_phy *qphy = qmp->usb_phy; - const struct qmp_phy_cfg *cfg = qphy->cfg; + struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; int ret = 0; - dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode); + dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode); if (!qmp->init_count) { dev_vdbg(dev, "PHY not initialized, bailing out\n"); @@ -2174,14 +2131,14 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(qphy->pipe_clk); + ret = clk_prepare_enable(qmp->pipe_clk); if (ret) { dev_err(dev, "pipe_clk enable failed, err=%d\n", ret); clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks); return ret; } - qmp_combo_disable_autonomous_mode(qphy); + qmp_combo_disable_autonomous_mode(qmp); return 0; } @@ -2193,7 +2150,7 @@ static const struct dev_pm_ops qmp_combo_pm_ops = { static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int num = cfg->num_vregs; int ret, i; @@ -2225,7 +2182,7 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int i; int ret; @@ -2246,7 +2203,7 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); int num = cfg->num_clks; int i; @@ -2283,7 +2240,7 @@ static void phy_clk_release_provider(void *res) * clk | +-------+ | +-----+ * +---------------+ */ -static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np) +static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; @@ -2386,12 +2343,12 @@ static unsigned long qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_phy_dp_clks *dp_clks; - const struct qmp_phy *qphy; + const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw); - qphy = dp_clks->qphy; - dp_opts = &qphy->dp_opts; + qmp = dp_clks->qmp; + dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { case 1620: @@ -2430,12 +2387,12 @@ static unsigned long qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_phy_dp_clks *dp_clks; - const struct qmp_phy *qphy; + const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw); - qphy = dp_clks->qphy; - dp_opts = &qphy->dp_opts; + qmp = dp_clks->qmp; + dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { case 1620: @@ -2470,8 +2427,7 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) return &dp_clks->dp_pixel_hw; } -static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, - struct device_node *np) +static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_init_data init = { }; struct qmp_phy_dp_clks *dp_clks; @@ -2482,8 +2438,8 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, if (!dp_clks) return -ENOMEM; - dp_clks->qphy = qphy; - qphy->dp_clks = dp_clks; + dp_clks->qmp = qmp; + qmp->dp_clks = dp_clks; snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_link_clk_ops; @@ -2512,20 +2468,15 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy, return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_dp(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->dp_serdes = serdes; + qmp->cfg = cfg; + qmp->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2533,17 +2484,17 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i * * Note that only tx/tx2 and pcs are used by the DP implementation. */ - qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->dp_tx)) - return PTR_ERR(qphy->dp_tx); + qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->dp_tx)) + return PTR_ERR(qmp->dp_tx); - qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->dp_pcs)) - return PTR_ERR(qphy->dp_pcs); + qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->dp_pcs)) + return PTR_ERR(qmp->dp_pcs); - qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->dp_tx2)) - return PTR_ERR(qphy->dp_tx2); + qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->dp_tx2)) + return PTR_ERR(qmp->dp_tx2); generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); if (IS_ERR(generic_phy)) { @@ -2552,66 +2503,59 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->dp_phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } -static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id, +static int qmp_combo_create_usb(struct device *dev, struct device_node *np, void __iomem *serdes, const struct qmp_phy_cfg *cfg) { - struct qcom_qmp *qmp = dev_get_drvdata(dev); + struct qmp_combo *qmp = dev_get_drvdata(dev); struct phy *generic_phy; - struct qmp_phy *qphy; int ret; - qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); - if (!qphy) - return -ENOMEM; - - qphy->cfg = cfg; - qphy->serdes = serdes; + qmp->cfg = cfg; + qmp->serdes = serdes; /* * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5 */ - qphy->tx = devm_of_iomap(dev, np, 0, NULL); - if (IS_ERR(qphy->tx)) - return PTR_ERR(qphy->tx); + qmp->tx = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(qmp->tx)) + return PTR_ERR(qmp->tx); - qphy->rx = devm_of_iomap(dev, np, 1, NULL); - if (IS_ERR(qphy->rx)) - return PTR_ERR(qphy->rx); + qmp->rx = devm_of_iomap(dev, np, 1, NULL); + if (IS_ERR(qmp->rx)) + return PTR_ERR(qmp->rx); - qphy->pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qphy->pcs)) - return PTR_ERR(qphy->pcs); + qmp->pcs = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->pcs)) + return PTR_ERR(qmp->pcs); if (cfg->pcs_usb_offset) - qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset; + qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset; - qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); - if (IS_ERR(qphy->tx2)) - return PTR_ERR(qphy->tx2); + qmp->tx2 = devm_of_iomap(dev, np, 3, NULL); + if (IS_ERR(qmp->tx2)) + return PTR_ERR(qmp->tx2); - qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); - if (IS_ERR(qphy->rx2)) - return PTR_ERR(qphy->rx2); + qmp->rx2 = devm_of_iomap(dev, np, 4, NULL); + if (IS_ERR(qmp->rx2)) + return PTR_ERR(qmp->rx2); - qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); - if (IS_ERR(qphy->pcs_misc)) { + qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + if (IS_ERR(qmp->pcs_misc)) { dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); - qphy->pcs_misc = NULL; + qmp->pcs_misc = NULL; } - qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL); - if (IS_ERR(qphy->pipe_clk)) { - return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk), - "failed to get lane%d pipe_clk\n", id); + qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get pipe clock\n"); } generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); @@ -2621,17 +2565,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int return ret; } - qphy->phy = generic_phy; - qphy->qmp = qmp; - qmp->phys[id] = qphy; - phy_set_drvdata(generic_phy, qphy); + qmp->usb_phy = generic_phy; + phy_set_drvdata(generic_phy, qmp); return 0; } static int qmp_combo_probe(struct platform_device *pdev) { - struct qcom_qmp *qmp; + struct qmp_combo *qmp; struct device *dev = &pdev->dev; struct device_node *child; struct phy_provider *phy_provider; @@ -2686,10 +2628,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (num > expected_phys) return -EINVAL; - qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL); - if (!qmp->phys) - return -ENOMEM; - pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2706,14 +2644,14 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = dp_serdes; /* Create per-lane phy */ - ret = qmp_combo_create_dp(dev, child, id, serdes, cfg); + ret = qmp_combo_create_dp(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); goto err_node_put; } - ret = phy_dp_clks_register(qmp, qmp->phys[id], child); + ret = phy_dp_clks_register(qmp, child); if (ret) { dev_err(qmp->dev, "failed to register DP clock source\n"); @@ -2723,15 +2661,13 @@ static int qmp_combo_probe(struct platform_device *pdev) serdes = usb_serdes; /* Create per-lane phy */ - ret = qmp_combo_create_usb(dev, child, id, serdes, cfg); + ret = qmp_combo_create_usb(dev, child, serdes, cfg); if (ret) { dev_err(dev, "failed to create lane%d phy, %d\n", id, ret); goto err_node_put; } - qmp->usb_phy = qmp->phys[id]; - /* * Register the pipe clock provided by phy. * See function description to see details of this pipe clock. -- GitLab From 6c7c449a008b7a279e15254a829d096a7ea72ee3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:19 +0100 Subject: [PATCH 414/694] phy: qcom-qmp-combo: clean up device-tree parsing Since the QMP driver split there will be precisely two child nodes so drop the obsolete iteration construct. While at it, drop the verbose error logging that would have been printed also on probe deferrals. Note that there is no need to check if there are additional child nodes (the kernel is not a devicetree validator), but let's return an error if either child node is missing. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-21-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 ++++++++--------------- 1 file changed, 28 insertions(+), 54 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 707dd68ba9935..9eacbd224012f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2575,13 +2575,12 @@ static int qmp_combo_probe(struct platform_device *pdev) { struct qmp_combo *qmp; struct device *dev = &pdev->dev; - struct device_node *child; + struct device_node *dp_np, *usb_np; struct phy_provider *phy_provider; void __iomem *serdes; void __iomem *usb_serdes; void __iomem *dp_serdes = NULL; const struct qmp_phy_cfg *cfg = NULL; - int num, id, expected_phys; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2607,8 +2606,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (IS_ERR(dp_serdes)) return PTR_ERR(dp_serdes); - expected_phys = 2; - mutex_init(&qmp->phy_mutex); ret = qmp_combo_clk_init(dev, cfg); @@ -2623,75 +2620,52 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; - num = of_get_available_child_count(dev->of_node); - /* do we have a rogue child node ? */ - if (num > expected_phys) + usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); + if (!usb_np) + return -EINVAL; + + dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); + if (!dp_np) { + of_node_put(usb_np); return -EINVAL; + } pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) - return ret; + goto err_node_put; /* * Prevent runtime pm from being ON by default. Users can enable * it using power/control in sysfs. */ pm_runtime_forbid(dev); - id = 0; - for_each_available_child_of_node(dev->of_node, child) { - if (of_node_name_eq(child, "dp-phy")) { - serdes = dp_serdes; - - /* Create per-lane phy */ - ret = qmp_combo_create_dp(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } - - ret = phy_dp_clks_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register DP clock source\n"); - goto err_node_put; - } - } else if (of_node_name_eq(child, "usb3-phy")) { - serdes = usb_serdes; - - /* Create per-lane phy */ - ret = qmp_combo_create_usb(dev, child, serdes, cfg); - if (ret) { - dev_err(dev, "failed to create lane%d phy, %d\n", - id, ret); - goto err_node_put; - } - - /* - * Register the pipe clock provided by phy. - * See function description to see details of this pipe clock. - */ - ret = phy_pipe_clk_register(qmp, child); - if (ret) { - dev_err(qmp->dev, - "failed to register pipe clock source\n"); - goto err_node_put; - } - } + ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg); + if (ret) + goto err_node_put; - id++; - } + ret = phy_pipe_clk_register(qmp, usb_np); + if (ret) + goto err_node_put; - if (!qmp->usb_phy) - return -EINVAL; + ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg); + if (ret) + goto err_node_put; + + ret = phy_dp_clks_register(qmp, dp_np); + if (ret) + goto err_node_put; phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + of_node_put(usb_np); + of_node_put(dp_np); + return PTR_ERR_OR_ZERO(phy_provider); err_node_put: - of_node_put(child); + of_node_put(usb_np); + of_node_put(dp_np); return ret; } -- GitLab From 44aff8e31080e13a24313120aae259c659b04cd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:20 +0100 Subject: [PATCH 415/694] phy: qcom-qmp-combo: clean up probe initialisation Stop abusing the driver data pointer and instead pass the driver state structure directly to the initialisation helpers during probe. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-22-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 61 ++++++++++------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9eacbd224012f..5d92cbfc458ef 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2148,9 +2148,10 @@ static const struct dev_pm_ops qmp_combo_pm_ops = { qmp_combo_runtime_resume, NULL) }; -static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_vreg_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_vregs; int ret, i; @@ -2180,9 +2181,10 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg return 0; } -static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_reset_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int i; int ret; @@ -2201,9 +2203,10 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf return 0; } -static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg) +static int qmp_combo_clk_init(struct qmp_combo *qmp) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; int num = cfg->num_clks; int i; @@ -2468,15 +2471,12 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->dp_serdes = serdes; /* * Get memory resources from the DP child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2509,15 +2509,13 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, return 0; } -static int qmp_combo_create_usb(struct device *dev, struct device_node *np, - void __iomem *serdes, const struct qmp_phy_cfg *cfg) +static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) { - struct qmp_combo *qmp = dev_get_drvdata(dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + struct device *dev = qmp->dev; struct phy *generic_phy; int ret; - qmp->cfg = cfg; - qmp->serdes = serdes; /* * Get memory resources from the USB child node: * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; @@ -2577,10 +2575,6 @@ static int qmp_combo_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *dp_np, *usb_np; struct phy_provider *phy_provider; - void __iomem *serdes; - void __iomem *usb_serdes; - void __iomem *dp_serdes = NULL; - const struct qmp_phy_cfg *cfg = NULL; int ret; qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL); @@ -2588,35 +2582,34 @@ static int qmp_combo_probe(struct platform_device *pdev) return -ENOMEM; qmp->dev = dev; - dev_set_drvdata(dev, qmp); - cfg = of_device_get_match_data(dev); - if (!cfg) + qmp->cfg = of_device_get_match_data(dev); + if (!qmp->cfg) return -EINVAL; - usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(serdes)) - return PTR_ERR(serdes); + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(qmp->dp_com)) return PTR_ERR(qmp->dp_com); - dp_serdes = devm_platform_ioremap_resource(pdev, 2); - if (IS_ERR(dp_serdes)) - return PTR_ERR(dp_serdes); + qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(qmp->dp_serdes)) + return PTR_ERR(qmp->dp_serdes); mutex_init(&qmp->phy_mutex); - ret = qmp_combo_clk_init(dev, cfg); + ret = qmp_combo_clk_init(qmp); if (ret) return ret; - ret = qmp_combo_reset_init(dev, cfg); + ret = qmp_combo_reset_init(qmp); if (ret) return ret; - ret = qmp_combo_vreg_init(dev, cfg); + ret = qmp_combo_vreg_init(qmp); if (ret) return ret; @@ -2640,7 +2633,7 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg); + ret = qmp_combo_create_usb(qmp, usb_np); if (ret) goto err_node_put; @@ -2648,7 +2641,7 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) goto err_node_put; - ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg); + ret = qmp_combo_create_dp(qmp, dp_np); if (ret) goto err_node_put; -- GitLab From 526103b7a6759e4afd1bcdd4de619642689a78d2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 12:06:21 +0100 Subject: [PATCH 416/694] phy: qcom-qmp-combo: clean up DP callback names Clean up and unify the DP callbacks by dropping the redundant "qcom" and "phy" prefix and infix and by using a common naming scheme ("qmp" + version + callback name). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114110621.4639-23-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 +++++++++++------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5d92cbfc458ef..c5d8f8bfaaaa1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -903,17 +903,17 @@ struct qmp_combo { struct qmp_phy_dp_clks *dp_clks; }; -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp); -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp); -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp); +static void qmp_v3_dp_aux_init(struct qmp_combo *qmp); +static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp); +static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp); -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp); -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp); -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp); +static void qmp_v4_dp_aux_init(struct qmp_combo *qmp); +static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp); +static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp); -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp); +static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp); static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) { @@ -991,10 +991,10 @@ static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qmp_v3_dp_aux_init, + .configure_dp_tx = qmp_v3_configure_dp_tx, + .configure_dp_phy = qmp_v3_configure_dp_phy, + .calibrate_dp_phy = qmp_v3_calibrate_dp_phy, .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), @@ -1036,10 +1036,10 @@ static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v3_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate, + .dp_aux_init = qmp_v3_dp_aux_init, + .configure_dp_tx = qmp_v3_configure_dp_tx, + .configure_dp_phy = qmp_v3_configure_dp_phy, + .calibrate_dp_phy = qmp_v3_calibrate_dp_phy, .clk_list = qmp_v3_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), @@ -1083,10 +1083,10 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), @@ -1129,10 +1129,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v5_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), @@ -1175,10 +1175,10 @@ static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { .swing_hbr3_hbr2 = &qmp_dp_v3_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2, - .dp_aux_init = qcom_qmp_v4_phy_dp_aux_init, - .configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx, - .configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy, - .calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate, + .dp_aux_init = qmp_v4_dp_aux_init, + .configure_dp_tx = qmp_v4_configure_dp_tx, + .configure_dp_phy = qmp_v4_configure_dp_phy, + .calibrate_dp_phy = qmp_v4_calibrate_dp_phy, .clk_list = qmp_v4_sm8250_usbphy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), @@ -1251,7 +1251,7 @@ static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp) return 0; } -static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp) +static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, @@ -1332,7 +1332,7 @@ static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, return 0; } -static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp) +static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias_en, drvr_en; @@ -1385,7 +1385,7 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) return reverse; } -static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) { const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; @@ -1462,7 +1462,7 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) +static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d }; u8 val; @@ -1476,7 +1476,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp) return 0; } -static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) +static void qmp_v4_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, @@ -1503,7 +1503,7 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp) qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } -static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) +static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) { /* Program default values before writing proper values */ writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL); @@ -1516,7 +1516,7 @@ static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp) QSERDES_V4_TX_TX_EMP_POST1_LVL); } -static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) { const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; @@ -1606,7 +1606,7 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp) return 0; } -static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; @@ -1614,7 +1614,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); + ret = qmp_v45_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1668,7 +1668,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp) return 0; } -static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) +static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp) { const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 bias0_en, drvr0_en, bias1_en, drvr1_en; @@ -1676,7 +1676,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) u32 status; int ret; - ret = qcom_qmp_v45_phy_configure_dp_phy(qmp); + ret = qmp_v45_configure_dp_phy(qmp); if (ret < 0) return ret; @@ -1729,7 +1729,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp) * We need to calibrate the aux setting here as many times * as the caller tries */ -static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp) +static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp) { static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d }; u8 val; -- GitLab From 32efdb0bb6e19965337fb63991237ecd99e0f9a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:44 +0100 Subject: [PATCH 417/694] dt-bindings: phy: qcom,qmp-usb3-dp: rename current bindings The current QMP USB3-DP PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB3-DP PHY block provides a single multi-protocol PHY and even if some resources are only used by either the USB or DP part of the device there is no real benefit in describing these resources in child nodes. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the PCS_LANE registers). In preparation for adding new bindings for SC8280XP which further bindings can be based on, rename the current schema file after SC7180, which was the first supported platform, and add a reference to the SC8280XP bindings. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- ...dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb3-dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} (91%) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml similarity index 91% rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml rename to Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml index 97a7ecafbf852..8afc5e815ae8e 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml @@ -2,10 +2,17 @@ %YAML 1.2 --- -$id: "http://devicetree.org/schemas/phy/qcom,qmp-usb3-dp-phy.yaml#" -$schema: "http://devicetree.org/meta-schemas/core.yaml#" +$id: http://devicetree.org/schemas/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm QMP USB3 DP PHY controller +title: Qualcomm QMP USB3 DP PHY controller (SC7180) + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS and USB. + + Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see + qcom,sc8280xp-qmp-usb43dp-phy.yaml. maintainers: - Wesley Cheng -- GitLab From e1c4c5436b4ad579762fbe78bfabc8aef59bd5b1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:45 +0100 Subject: [PATCH 418/694] dt-bindings: phy: qcom,qmp-usb3-dp: fix sc8280xp binding The current QMP USB3-DP PHY bindings are based on the original MSM8996 binding which provided multiple PHYs per IP block and these in turn were described by child nodes. The QMP USB3-DP PHY block provides a single multi-protocol PHY and even if some resources are only used by either the USB or DP part of the device there is no real benefit in describing these resources in child nodes. The original MSM8996 binding also ended up describing the individual register blocks as belonging to either the wrapper node or the PHY child nodes. This is an unnecessary level of detail which has lead to problems when later IP blocks using different register layouts have been forced to fit the original mould rather than updating the binding. The bindings are arguable also incomplete as they only the describe register blocks used by the current Linux drivers (e.g. does not include the PCS LANE registers). This is specifically true for later USB4-USB3-DP QMP PHYs where the TX registers are used by both the USB3 and DP parts of the PHY (and where the USB4 part of the PHY was not covered by the binding at all). Notably there are also no DP "RX" (sic) registers as described by the current bindings and the DP "PCS" region is really a set of DP_PHY registers. Add a new binding for the USB4-USB3-DP QMP PHYs found on SC8280XP which further bindings can be based on. Note that the binding uses a PHY index to access either the USB3 or DP part of the PHY and that this can later be used also for the USB4 part if needed. Similarly, the clock inputs and outputs can later be extended to support USB4. Also note that the current binding is simply removed instead of being deprecated as it was only recently merged and would not allow for supporting DP mode. Reviewed-by: Rob Herring Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../phy/qcom,sc7180-qmp-usb3-dp-phy.yaml | 12 --- .../phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml | 99 +++++++++++++++++++ include/dt-bindings/phy/phy-qcom-qmp.h | 20 ++++ 3 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml create mode 100644 include/dt-bindings/phy/phy-qcom-qmp.h diff --git a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml index 8afc5e815ae8e..d9d0ab90edb1b 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml @@ -23,7 +23,6 @@ properties: - qcom,sc7180-qmp-usb3-dp-phy - qcom,sc7280-qmp-usb3-dp-phy - qcom,sc8180x-qmp-usb3-dp-phy - - qcom,sc8280xp-qmp-usb43dp-phy - qcom,sdm845-qmp-usb3-dp-phy - qcom,sm8250-qmp-usb3-dp-phy reg: @@ -169,17 +168,6 @@ required: additionalProperties: false -allOf: - - if: - properties: - compatible: - contains: - enum: - - qcom,sc8280xp-qmp-usb43dp-phy - then: - required: - - power-domains - examples: - | #include diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml new file mode 100644 index 0000000000000..6f31693d98689 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm QMP USB4-USB3-DP PHY controller (SC8280XP) + +maintainers: + - Vinod Koul + +description: + The QMP PHY controller supports physical layer functionality for a number of + controllers on Qualcomm chipsets, such as, PCIe, UFS and USB. + +properties: + compatible: + enum: + - qcom,sc8280xp-qmp-usb43dp-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 4 + + clock-names: + items: + - const: aux + - const: ref + - const: com_aux + - const: usb3_pipe + + power-domains: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: phy + - const: common + + vdda-phy-supply: true + + vdda-pll-supply: true + + "#clock-cells": + const: 1 + description: + See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + + "#phy-cells": + const: 1 + description: + See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + - resets + - reset-names + - vdda-phy-supply + - vdda-pll-supply + - "#clock-cells" + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + + phy@88eb000 { + compatible = "qcom,sc8280xp-qmp-usb43dp-phy"; + reg = <0x088eb000 0x4000>; + + clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>, + <&gcc GCC_USB4_EUD_CLKREF_CLK>, + <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>, + <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; + clock-names = "aux", "ref", "com_aux", "usb3_pipe"; + + power-domains = <&gcc USB30_PRIM_GDSC>; + + resets = <&gcc GCC_USB3_PHY_PRIM_BCR>, + <&gcc GCC_USB4_DP_PHY_PRIM_BCR>; + reset-names = "phy", "common"; + + vdda-phy-supply = <&vreg_l9d>; + vdda-pll-supply = <&vreg_l4d>; + + #clock-cells = <1>; + #phy-cells = <1>; + }; diff --git a/include/dt-bindings/phy/phy-qcom-qmp.h b/include/dt-bindings/phy/phy-qcom-qmp.h new file mode 100644 index 0000000000000..4edec4c5b2241 --- /dev/null +++ b/include/dt-bindings/phy/phy-qcom-qmp.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Qualcomm QMP PHY constants + * + * Copyright (C) 2022 Linaro Limited + */ + +#ifndef _DT_BINDINGS_PHY_QMP +#define _DT_BINDINGS_PHY_QMP + +/* QMP USB4-USB3-DP clocks */ +#define QMP_USB43DP_USB3_PIPE_CLK 0 +#define QMP_USB43DP_DP_LINK_CLK 1 +#define QMP_USB43DP_DP_VCO_DIV_CLK 2 + +/* QMP USB4-USB3-DP PHYs */ +#define QMP_USB43DP_USB3_PHY 0 +#define QMP_USB43DP_DP_PHY 1 + +#endif /* _DT_BINDINGS_PHY_QMP */ -- GitLab From 774903ca6c499887f554234bb019c91aa0a8f741 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:46 +0100 Subject: [PATCH 419/694] phy: qcom-qmp-combo: drop v4 reference-clock source The source clock for the reference clock should not be described by the devicetree and instead this relationship should be modelled in the clock driver. Drop the management of the source clock from the driver for SC8180X and SC8280XP. Note that support for the former is not yet in mainline. Also note that the binding has never been updated to describe the v4 clocks for SC8180X. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index c5d8f8bfaaaa1..5da42a4e5bf62 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -945,7 +945,7 @@ static const char * const qmp_v3_phy_clk_l[] = { }; static const char * const qmp_v4_phy_clk_l[] = { - "aux", "ref_clk_src", "ref", "com_aux", + "aux", "ref", "com_aux", }; /* the primary usb3 phy on sm8250 doesn't have a ref clock */ -- GitLab From b3982f2144e10bd542189e38cd47709e55389606 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:47 +0100 Subject: [PATCH 420/694] phy: qcom-qmp-combo: restructure PHY creation In preparation for supporting devicetree bindings which do not use child nodes, move the PHY creation to probe() proper and parse the serdes, dp_com and dp_serdes resources in a dedicated legacy devicetree helper. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 89 ++++++++++++----------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5da42a4e5bf62..85def6560e436 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2471,11 +2471,9 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } -static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) +static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) { struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; /* * Get memory resources from the DP child node: @@ -2496,25 +2494,13 @@ static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np) if (IS_ERR(qmp->dp_tx2)) return PTR_ERR(qmp->dp_tx2); - generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create DP PHY: %d\n", ret); - return ret; - } - - qmp->dp_phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); - return 0; } -static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) +static int qmp_combo_parse_dt_lecacy_usb(struct qmp_combo *qmp, struct device_node *np) { const struct qmp_phy_cfg *cfg = qmp->cfg; struct device *dev = qmp->dev; - struct phy *generic_phy; - int ret; /* * Get memory resources from the USB child node: @@ -2556,15 +2542,34 @@ static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np) "failed to get pipe clock\n"); } - generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops); - if (IS_ERR(generic_phy)) { - ret = PTR_ERR(generic_phy); - dev_err(dev, "failed to create USB PHY: %d\n", ret); + return 0; +} + +static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node *usb_np, + struct device_node *dp_np) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + int ret; + + qmp->serdes = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(qmp->serdes)) + return PTR_ERR(qmp->serdes); + + qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->dp_com)) + return PTR_ERR(qmp->dp_com); + + qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(qmp->dp_serdes)) + return PTR_ERR(qmp->dp_serdes); + + ret = qmp_combo_parse_dt_lecacy_usb(qmp, usb_np); + if (ret) return ret; - } - qmp->usb_phy = generic_phy; - phy_set_drvdata(generic_phy, qmp); + ret = qmp_combo_parse_dt_lecacy_dp(qmp, dp_np); + if (ret) + return ret; return 0; } @@ -2587,18 +2592,6 @@ static int qmp_combo_probe(struct platform_device *pdev) if (!qmp->cfg) return -EINVAL; - qmp->serdes = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qmp->serdes)) - return PTR_ERR(qmp->serdes); - - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); - - qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); - if (IS_ERR(qmp->dp_serdes)) - return PTR_ERR(qmp->dp_serdes); - mutex_init(&qmp->phy_mutex); ret = qmp_combo_clk_init(qmp); @@ -2623,6 +2616,10 @@ static int qmp_combo_probe(struct platform_device *pdev) return -EINVAL; } + ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); + if (ret) + goto err_node_put; + pm_runtime_set_active(dev); ret = devm_pm_runtime_enable(dev); if (ret) @@ -2633,21 +2630,31 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = qmp_combo_create_usb(qmp, usb_np); + ret = phy_pipe_clk_register(qmp, usb_np); if (ret) goto err_node_put; - ret = phy_pipe_clk_register(qmp, usb_np); + ret = phy_dp_clks_register(qmp, dp_np); if (ret) goto err_node_put; - ret = qmp_combo_create_dp(qmp, dp_np); - if (ret) + qmp->usb_phy = devm_phy_create(dev, usb_np, &qmp_combo_usb_phy_ops); + if (IS_ERR(qmp->usb_phy)) { + ret = PTR_ERR(qmp->usb_phy); + dev_err(dev, "failed to create USB PHY: %d\n", ret); goto err_node_put; + } - ret = phy_dp_clks_register(qmp, dp_np); - if (ret) + phy_set_drvdata(qmp->usb_phy, qmp); + + qmp->dp_phy = devm_phy_create(dev, dp_np, &qmp_combo_dp_phy_ops); + if (IS_ERR(qmp->dp_phy)) { + ret = PTR_ERR(qmp->dp_phy); + dev_err(dev, "failed to create DP PHY: %d\n", ret); goto err_node_put; + } + + phy_set_drvdata(qmp->dp_phy, qmp); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -- GitLab From 0dd521d593ade3e8494d29abb653fda5bec5d508 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:48 +0100 Subject: [PATCH 421/694] phy: qcom-qmp-combo: generate pipe clock name In preparation for supporting devicetree bindings which do not use child nodes, generate also the USB3 pipe clock name based on the platform device name as is done for the DP clocks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-6-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 85def6560e436..7434955c88989 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2247,18 +2247,15 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_fixed_rate *fixed; struct clk_init_data init = { }; + char name[64]; int ret; - ret = of_property_read_string(np, "clock-output-names", &init.name); - if (ret) { - dev_err(qmp->dev, "%pOFn: No clock-output-names\n", np); - return ret; - } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); if (!fixed) return -ENOMEM; + snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); + init.name = name; init.ops = &clk_fixed_rate_ops; /* controllers using QMP phys use 125MHz pipe clock interface */ -- GitLab From ee81f2eb0ee0c99a109f91a9617a8d7698479181 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:49 +0100 Subject: [PATCH 422/694] phy: qcom-qmp-combo: drop redundant clock structure Drop the unnecessary DP clock structure and instead store the clocks directly in the driver data. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-7-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 52 ++++++++--------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7434955c88989..ebfefecffd867 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -859,12 +859,6 @@ struct qmp_phy_cfg { }; -struct qmp_phy_dp_clks { - struct qmp_combo *qmp; - struct clk_hw dp_link_hw; - struct clk_hw dp_pixel_hw; -}; - struct qmp_combo { struct device *dev; @@ -900,7 +894,9 @@ struct qmp_combo { struct phy *dp_phy; unsigned int dp_aux_cfg; struct phy_configure_opts_dp dp_opts; - struct qmp_phy_dp_clks *dp_clks; + + struct clk_hw dp_link_hw; + struct clk_hw dp_pixel_hw; }; static void qmp_v3_dp_aux_init(struct qmp_combo *qmp); @@ -1387,7 +1383,6 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; @@ -1420,8 +1415,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) } writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); - clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); - clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); + clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); + clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); @@ -1518,7 +1513,6 @@ static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) { - const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks; const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div, status; unsigned long pixel_freq; @@ -1556,8 +1550,8 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) } writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); - clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000); - clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq); + clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); + clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); @@ -2342,12 +2336,10 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, static unsigned long qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { - const struct qmp_phy_dp_clks *dp_clks; const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; - dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw); - qmp = dp_clks->qmp; + qmp = container_of(hw, struct qmp_combo, dp_pixel_hw); dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { @@ -2386,12 +2378,10 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, static unsigned long qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { - const struct qmp_phy_dp_clks *dp_clks; const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; - dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw); - qmp = dp_clks->qmp; + qmp = container_of(hw, struct qmp_combo, dp_link_hw); dp_opts = &qmp->dp_opts; switch (dp_opts->link_rate) { @@ -2413,7 +2403,7 @@ static const struct clk_ops qcom_qmp_dp_link_clk_ops = { static struct clk_hw * qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) { - struct qmp_phy_dp_clks *dp_clks = data; + struct qmp_combo *qmp = data; unsigned int idx = clkspec->args[0]; if (idx >= 2) { @@ -2422,42 +2412,34 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) } if (idx == 0) - return &dp_clks->dp_link_hw; + return &qmp->dp_link_hw; - return &dp_clks->dp_pixel_hw; + return &qmp->dp_pixel_hw; } static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) { struct clk_init_data init = { }; - struct qmp_phy_dp_clks *dp_clks; char name[64]; int ret; - dp_clks = devm_kzalloc(qmp->dev, sizeof(*dp_clks), GFP_KERNEL); - if (!dp_clks) - return -ENOMEM; - - dp_clks->qmp = qmp; - qmp->dp_clks = dp_clks; - snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_link_clk_ops; init.name = name; - dp_clks->dp_link_hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_link_hw); + qmp->dp_link_hw.init = &init; + ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw); if (ret) return ret; snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev)); init.ops = &qcom_qmp_dp_pixel_clk_ops; init.name = name; - dp_clks->dp_pixel_hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_pixel_hw); + qmp->dp_pixel_hw.init = &init; + ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw); if (ret) return ret; - ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, dp_clks); + ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp); if (ret) return ret; -- GitLab From 55b1c39b4990ebdab2faa2e4c06d17476d6d2d3c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:50 +0100 Subject: [PATCH 423/694] phy: qcom-qmp-combo: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-8-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index ebfefecffd867..9b945a72ae9b8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -895,6 +895,7 @@ struct qmp_combo { unsigned int dp_aux_cfg; struct phy_configure_opts_dp dp_opts; + struct clk_fixed_rate pipe_clk_fixed; struct clk_hw dp_link_hw; struct clk_hw dp_pixel_hw; }; @@ -2239,15 +2240,11 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; char name[64]; int ret; - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); init.name = name; init.ops = &clk_fixed_rate_ops; -- GitLab From 74401c85fb3b134d884d5de968c66784527d12d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:51 +0100 Subject: [PATCH 424/694] phy: qcom-qmp-combo: add clock registration helper In preparation for supporting devicetree bindings which do not use child nodes, add a clock registration helper to handle the registration of both the USB and DP clocks. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-9-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9b945a72ae9b8..1079a16b45f68 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2447,6 +2447,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); } +static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, + struct device_node *dp_np) +{ + int ret; + + ret = phy_pipe_clk_register(qmp, usb_np); + if (ret) + return ret; + + ret = phy_dp_clks_register(qmp, dp_np); + if (ret) + return ret; + + return 0; +} + static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) { struct device *dev = qmp->dev; @@ -2606,11 +2622,7 @@ static int qmp_combo_probe(struct platform_device *pdev) */ pm_runtime_forbid(dev); - ret = phy_pipe_clk_register(qmp, usb_np); - if (ret) - goto err_node_put; - - ret = phy_dp_clks_register(qmp, dp_np); + ret = qmp_combo_register_clocks(qmp, usb_np, dp_np); if (ret) goto err_node_put; -- GitLab From ce51f7a70a3bbc20c07079c06e7721cabfe34dd9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:52 +0100 Subject: [PATCH 425/694] phy: qcom-qmp-combo: separate clock and provider registration In preparation for supporting devicetree bindings which do not use child nodes, separate clock registration from clock-provider registration. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-10-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 1079a16b45f68..89a5b51c770d0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2243,7 +2243,6 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; char name[64]; - int ret; snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev)); init.name = name; @@ -2253,19 +2252,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) fixed->fixed_rate = 125000000; fixed->hw.init = &init; - ret = devm_clk_hw_register(qmp->dev, &fixed->hw); - if (ret) - return ret; - - ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &fixed->hw); - if (ret) - return ret; - - /* - * Roll a devm action because the clock provider is the child node, but - * the child node is not actually a device. - */ - return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); + return devm_clk_hw_register(qmp->dev, &fixed->hw); } /* @@ -2436,15 +2423,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) if (ret) return ret; - ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp); - if (ret) - return ret; - - /* - * Roll a devm action because the clock provider is the child node, but - * the child node is not actually a device. - */ - return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np); + return 0; } static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, @@ -2460,7 +2439,24 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; - return 0; + ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get, + &qmp->pipe_clk_fixed.hw); + if (ret) + return ret; + + /* + * Roll a devm action because the clock provider is the child node, but + * the child node is not actually a device. + */ + ret = devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, usb_np); + if (ret) + return ret; + + ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp); + if (ret) + return ret; + + return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, dp_np); } static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np) -- GitLab From b71bf1ebe936cc63983e5339d218918ed56e9804 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:53 +0100 Subject: [PATCH 426/694] phy: qcom-qmp-combo: clean up DP clock callbacks Clean up the DP clock callbacks somewhat by dropping the redundant "qcom" prefix and removing line breaks after type specifiers. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-11-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 33 ++++++++++------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 89a5b51c770d0..fb3705b008233 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2304,8 +2304,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np) * for DP pixel clock * */ -static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) +static int qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { switch (req->rate) { case 1620000000UL / 2: @@ -2317,8 +2316,7 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, } } -static unsigned long -qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +static unsigned long qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; @@ -2340,13 +2338,12 @@ qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) } } -static const struct clk_ops qcom_qmp_dp_pixel_clk_ops = { - .determine_rate = qcom_qmp_dp_pixel_clk_determine_rate, - .recalc_rate = qcom_qmp_dp_pixel_clk_recalc_rate, +static const struct clk_ops qmp_dp_pixel_clk_ops = { + .determine_rate = qmp_dp_pixel_clk_determine_rate, + .recalc_rate = qmp_dp_pixel_clk_recalc_rate, }; -static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, - struct clk_rate_request *req) +static int qmp_dp_link_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { switch (req->rate) { case 162000000: @@ -2359,8 +2356,7 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw, } } -static unsigned long -qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +static unsigned long qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { const struct qmp_combo *qmp; const struct phy_configure_opts_dp *dp_opts; @@ -2379,13 +2375,12 @@ qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) } } -static const struct clk_ops qcom_qmp_dp_link_clk_ops = { - .determine_rate = qcom_qmp_dp_link_clk_determine_rate, - .recalc_rate = qcom_qmp_dp_link_clk_recalc_rate, +static const struct clk_ops qmp_dp_link_clk_ops = { + .determine_rate = qmp_dp_link_clk_determine_rate, + .recalc_rate = qmp_dp_link_clk_recalc_rate, }; -static struct clk_hw * -qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) +static struct clk_hw *qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data) { struct qmp_combo *qmp = data; unsigned int idx = clkspec->args[0]; @@ -2408,7 +2403,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) int ret; snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev)); - init.ops = &qcom_qmp_dp_link_clk_ops; + init.ops = &qmp_dp_link_clk_ops; init.name = name; qmp->dp_link_hw.init = &init; ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw); @@ -2416,7 +2411,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return ret; snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev)); - init.ops = &qcom_qmp_dp_pixel_clk_ops; + init.ops = &qmp_dp_pixel_clk_ops; init.name = name; qmp->dp_pixel_hw.init = &init; ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw); @@ -2452,7 +2447,7 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; - ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp); + ret = of_clk_add_hw_provider(dp_np, qmp_dp_clks_hw_get, qmp); if (ret) return ret; -- GitLab From 9e5b59ea6c216d9b36e3250c2efa081ab4ea2ff5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:54 +0100 Subject: [PATCH 427/694] phy: qcom-qmp-combo: rename common-register pointers The common registers are shared by the USB and DP parts of the PHY so drop the misleading "dp" prefix from the corresponding pointers. Note that the "DP" prefix could also be dropped from the corresponding defines, but leave that in place for now. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-12-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index fb3705b008233..5777bd1f76b38 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -864,7 +864,7 @@ struct qmp_combo { const struct qmp_phy_cfg *cfg; - void __iomem *dp_com; + void __iomem *com; void __iomem *serdes; void __iomem *tx; @@ -1767,7 +1767,7 @@ static int qmp_combo_dp_calibrate(struct phy *phy) static int qmp_combo_com_init(struct qmp_combo *qmp) { const struct qmp_phy_cfg *cfg = qmp->cfg; - void __iomem *dp_com = qmp->dp_com; + void __iomem *com = qmp->com; int ret; mutex_lock(&qmp->phy_mutex); @@ -1798,25 +1798,25 @@ static int qmp_combo_com_init(struct qmp_combo *qmp) if (ret) goto err_assert_reset; - qphy_setbits(dp_com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN); + qphy_setbits(com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN); /* override hardware control for reset of qmp phy */ - qphy_setbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, + qphy_setbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, SW_DPPHY_RESET_MUX | SW_DPPHY_RESET | SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET); /* Default type-c orientation, i.e CC1 */ - qphy_setbits(dp_com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02); + qphy_setbits(com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02); - qphy_setbits(dp_com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE); + qphy_setbits(com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE); /* bring both QMP USB and QMP DP PHYs PCS block out of reset */ - qphy_clrbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, + qphy_clrbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL, SW_DPPHY_RESET_MUX | SW_DPPHY_RESET | SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET); - qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); - qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); + qphy_clrbits(com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); + qphy_clrbits(com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN); @@ -2538,9 +2538,9 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node * if (IS_ERR(qmp->serdes)) return PTR_ERR(qmp->serdes); - qmp->dp_com = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(qmp->dp_com)) - return PTR_ERR(qmp->dp_com); + qmp->com = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(qmp->com)) + return PTR_ERR(qmp->com); qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(qmp->dp_serdes)) -- GitLab From 133836a7edf4e5783ab0caa669cdb94ab02b9b62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:55 +0100 Subject: [PATCH 428/694] phy: qcom-qmp-combo: rename DP_PHY register pointer The DP_PHY registers have erroneously been referred to as "PCS" registers since DisplayPort support was added to the QMP drivers (including in the devicetree binding). Rename the corresponding pointer to match the register names. Note that the repeated "dp" in the field name is intentional and this DP register block is called "DP_PHY" (not just "PHY"). Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-13-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 139 +++++++++++----------- 1 file changed, 70 insertions(+), 69 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 5777bd1f76b38..b82bd0a221d6f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -878,7 +878,7 @@ struct qmp_combo { void __iomem *dp_serdes; void __iomem *dp_tx; void __iomem *dp_tx2; - void __iomem *dp_pcs; + void __iomem *dp_dp_phy; struct clk *pipe_clk; struct clk_bulk_data *clks; @@ -1252,20 +1252,20 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL, qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_LANE_0_1_PWRDN | DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN | @@ -1273,22 +1273,22 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp) QSERDES_V3_COM_CLKBUF_RX_DRIVE_L, qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0x24, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7); + writel(0xbb, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9); qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_dp_phy + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK); } static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp, @@ -1372,12 +1372,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) * if (lane_cnt == 4 || orientation == ORIENTATION_CC1) * val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; * if (orientation == ORIENTATION_CC2) - * writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE); + * writel(0x4c, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_MODE); */ val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); - writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); return reverse; } @@ -1390,8 +1390,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) qmp_combo_configure_dp_mode(qmp); - writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1414,16 +1414,16 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_VCO_DIV); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); - writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x04, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL); @@ -1434,20 +1434,20 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp) 10000)) return -ETIMEDOUT; - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, 10000)) return -ETIMEDOUT; - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS, + return readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1467,7 +1467,7 @@ static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp) qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1476,27 +1476,27 @@ static void qmp_v4_dp_aux_init(struct qmp_combo *qmp) { writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, - qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); /* Turn on BIAS current for PHY/PLL */ writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); - writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4); - writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5); - writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7); - writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8); - writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); + writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4); + writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5); + writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7); + writel(0xb7, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8); + writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9); qmp->dp_aux_cfg = 0; writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK | PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK | PHY_AUX_REQ_ERR_MASK, - qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); + qmp->dp_dp_phy + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK); } static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp) @@ -1518,15 +1518,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) u32 phy_vco_div, status; unsigned long pixel_freq; - writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1); + writel(0x0f, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_CFG_1); qmp_combo_configure_dp_mode(qmp); - writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); - writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2); + writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); + writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2); - writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); - writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL); + writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL); switch (dp_opts->link_rate) { case 1620: @@ -1549,15 +1549,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG); - writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); + writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL); @@ -1582,16 +1582,16 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp) 10000)) return -ETIMEDOUT; - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(0)) > 0), 500, 10000)) return -ETIMEDOUT; - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1640,11 +1640,11 @@ static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp) writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN); writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1697,11 +1697,11 @@ static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp) writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN); writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN); - writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); udelay(2000); - writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG); + writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG); - if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS, + if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS, status, ((status & BIT(1)) > 0), 500, @@ -1733,7 +1733,7 @@ static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp) qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings); val = cfg1_settings[qmp->dp_aux_cfg]; - writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1); + writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1); return 0; } @@ -1906,7 +1906,7 @@ static int qmp_combo_dp_power_off(struct phy *phy) struct qmp_combo *qmp = phy_get_drvdata(phy); /* Assert DP PHY power down */ - writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL); + writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); return 0; } @@ -2463,15 +2463,16 @@ static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_nod * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2; * tx2 -> 3; rx2 -> 4 * - * Note that only tx/tx2 and pcs are used by the DP implementation. + * Note that only tx/tx2 and pcs (dp_phy) are used by the DP + * implementation. */ qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL); if (IS_ERR(qmp->dp_tx)) return PTR_ERR(qmp->dp_tx); - qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL); - if (IS_ERR(qmp->dp_pcs)) - return PTR_ERR(qmp->dp_pcs); + qmp->dp_dp_phy = devm_of_iomap(dev, np, 2, NULL); + if (IS_ERR(qmp->dp_dp_phy)) + return PTR_ERR(qmp->dp_dp_phy); qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL); if (IS_ERR(qmp->dp_tx2)) -- GitLab From 83a0bbe39b1797cab47665efcf689f774b42af88 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 21 Nov 2022 09:50:56 +0100 Subject: [PATCH 429/694] phy: qcom-qmp-combo: add support for updated sc8280xp binding Add support for the new SC8280XP binding. Note that the binding does not try to describe every register subregion and instead the driver holds the corresponding offsets. Also note that (possibly) unlike on earlier platforms, the TX registers are used by both the USB and DP implementation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221121085058.31213-14-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 145 ++++++++++++++++++++-- 1 file changed, 134 insertions(+), 11 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index b82bd0a221d6f..77052c66cf70b 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include "phy-qcom-qmp.h" @@ -798,7 +798,23 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = { struct qmp_combo; +struct qmp_combo_offsets { + u16 com; + u16 txa; + u16 rxa; + u16 txb; + u16 rxb; + u16 usb3_serdes; + u16 usb3_pcs_misc; + u16 usb3_pcs; + u16 usb3_pcs_usb; + u16 dp_serdes; + u16 dp_dp_phy; +}; + struct qmp_phy_cfg { + const struct qmp_combo_offsets *offsets; + /* Init sequence for PHY blocks - serdes, tx, rx, pcs */ const struct qmp_phy_init_tbl *serdes_tbl; int serdes_tbl_num; @@ -959,6 +975,20 @@ static const char * const sc7180_usb3phy_reset_l[] = { "phy", }; +static const struct qmp_combo_offsets qmp_combo_offsets_v5 = { + .com = 0x0000, + .txa = 0x0400, + .rxa = 0x0600, + .txb = 0x0a00, + .rxb = 0x0c00, + .usb3_serdes = 0x1000, + .usb3_pcs_misc = 0x1200, + .usb3_pcs = 0x1400, + .usb3_pcs_usb = 0x1700, + .dp_serdes = 0x2000, + .dp_dp_phy = 0x2200, +}; + static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = { .serdes_tbl = qmp_v3_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl), @@ -1098,6 +1128,8 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = { }; static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { + .offsets = &qmp_combo_offsets_v5, + .serdes_tbl = sc8280xp_usb43dp_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl), .tx_tbl = sc8280xp_usb43dp_tx_tbl, @@ -1138,7 +1170,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = { .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), .regs = qmp_v4_usb3phy_regs_layout, - .pcs_usb_offset = 0x300, }; static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = { @@ -2421,6 +2452,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np) return 0; } +static struct clk_hw *qmp_combo_clk_hw_get(struct of_phandle_args *clkspec, void *data) +{ + struct qmp_combo *qmp = data; + + switch (clkspec->args[0]) { + case QMP_USB43DP_USB3_PIPE_CLK: + return &qmp->pipe_clk_fixed.hw; + case QMP_USB43DP_DP_LINK_CLK: + return &qmp->dp_link_hw; + case QMP_USB43DP_DP_VCO_DIV_CLK: + return &qmp->dp_pixel_hw; + } + + return ERR_PTR(-EINVAL); +} + static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np, struct device_node *dp_np) { @@ -2434,6 +2481,15 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node * if (ret) return ret; + /* + * Register a single provider for bindings without child nodes. + */ + if (usb_np == qmp->dev->of_node) + return devm_of_clk_add_hw_provider(qmp->dev, qmp_combo_clk_hw_get, qmp); + + /* + * Register multiple providers for legacy bindings with child nodes. + */ ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get, &qmp->pipe_clk_fixed.hw); if (ret) @@ -2558,6 +2614,63 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node * return 0; } +static int qmp_combo_parse_dt(struct qmp_combo *qmp) +{ + struct platform_device *pdev = to_platform_device(qmp->dev); + const struct qmp_phy_cfg *cfg = qmp->cfg; + const struct qmp_combo_offsets *offs = cfg->offsets; + struct device *dev = qmp->dev; + void __iomem *base; + + if (!offs) + return -EINVAL; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + qmp->com = base + offs->com; + qmp->tx = base + offs->txa; + qmp->rx = base + offs->rxa; + qmp->tx2 = base + offs->txb; + qmp->rx2 = base + offs->rxb; + + qmp->serdes = base + offs->usb3_serdes; + qmp->pcs_misc = base + offs->usb3_pcs_misc; + qmp->pcs = base + offs->usb3_pcs; + qmp->pcs_usb = base + offs->usb3_pcs_usb; + + qmp->dp_serdes = base + offs->dp_serdes; + qmp->dp_tx = base + offs->txa; + qmp->dp_tx2 = base + offs->txb; + qmp->dp_dp_phy = base + offs->dp_dp_phy; + + qmp->pipe_clk = devm_clk_get(dev, "usb3_pipe"); + if (IS_ERR(qmp->pipe_clk)) { + return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk), + "failed to get usb3_pipe clock\n"); + } + + return 0; +} + +static struct phy *qmp_combo_phy_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct qmp_combo *qmp = dev_get_drvdata(dev); + + if (args->args_count == 0) + return ERR_PTR(-EINVAL); + + switch (args->args[0]) { + case QMP_USB43DP_USB3_PHY: + return qmp->usb_phy; + case QMP_USB43DP_DP_PHY: + return qmp->dp_phy; + } + + return ERR_PTR(-EINVAL); +} + static int qmp_combo_probe(struct platform_device *pdev) { struct qmp_combo *qmp; @@ -2590,17 +2703,22 @@ static int qmp_combo_probe(struct platform_device *pdev) if (ret) return ret; + /* Check for legacy binding with child nodes. */ usb_np = of_get_child_by_name(dev->of_node, "usb3-phy"); - if (!usb_np) - return -EINVAL; + if (usb_np) { + dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); + if (!dp_np) { + of_node_put(usb_np); + return -EINVAL; + } - dp_np = of_get_child_by_name(dev->of_node, "dp-phy"); - if (!dp_np) { - of_node_put(usb_np); - return -EINVAL; - } + ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); + } else { + usb_np = of_node_get(dev->of_node); + dp_np = of_node_get(dev->of_node); - ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np); + ret = qmp_combo_parse_dt(qmp); + } if (ret) goto err_node_put; @@ -2636,7 +2754,12 @@ static int qmp_combo_probe(struct platform_device *pdev) phy_set_drvdata(qmp->dp_phy, qmp); - phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + dev_set_drvdata(dev, qmp); + + if (usb_np == dev->of_node) + phy_provider = devm_of_phy_provider_register(dev, qmp_combo_phy_xlate); + else + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); of_node_put(usb_np); of_node_put(dp_np); -- GitLab From 1446d03ec290760788b1868b5aa967383d86dd77 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:38:55 +0100 Subject: [PATCH 430/694] dt-bindings: phy: qcom,sc8280xp-qmp-usb3-uni: drop reference-clock source The source clock for the reference clock is not used by the PHY directly and should not be included in the devicetree binding. Fixes: e8e58e29a0c9 ("dt-bindings: phy: qcom,qmp-usb: fix sc8280xp binding") Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221111093857.11360-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml index ef080509747ae..16fce10382852 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -22,12 +22,11 @@ properties: maxItems: 1 clocks: - maxItems: 5 + maxItems: 4 clock-names: items: - const: aux - - const: ref_clk_src - const: ref - const: com_aux - const: pipe @@ -82,12 +81,10 @@ examples: reg = <0x088ef000 0x2000>; clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB3_MP0_CLKREF_CLK>, <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>; - clock-names = "aux", "ref_clk_src", "ref", "com_aux", - "pipe"; + clock-names = "aux", "ref", "com_aux", "pipe"; power-domains = <&gcc USB30_MP_GDSC>; -- GitLab From 3b41b61a2fe4174ba43fdb599c9d6accd35ac179 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:38:56 +0100 Subject: [PATCH 431/694] phy: qcom-qmp-usb: drop sc8280xp reference-clock source The source clock for the reference clock is not used by the PHY directly and should not be described by the devicetree (instead this relationship should be modelled in the clock driver). Drop the driver management of the reference-clock source for SC8280XP. Once the other clock drivers have been updated, the corresponding change can be done also for the other QMP v4 platforms. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221111093857.11360-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 55029ea63f731..9fbad6b1d3ab1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1523,6 +1523,10 @@ static const char * const qmp_v3_phy_clk_l[] = { }; static const char * const qmp_v4_phy_clk_l[] = { + "aux", "ref", "com_aux", +}; + +static const char * const qmp_v4_ref_phy_clk_l[] = { "aux", "ref_clk_src", "ref", "com_aux", }; @@ -1729,8 +1733,8 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1755,8 +1759,8 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8150_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8150_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8150_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1806,8 +1810,8 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8250_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8250_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8250_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, @@ -1907,8 +1911,8 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .pcs_tbl_num = ARRAY_SIZE(sm8350_usb3_uniphy_pcs_tbl), .pcs_usb_tbl = sm8350_usb3_uniphy_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(sm8350_usb3_uniphy_pcs_usb_tbl), - .clk_list = qmp_v4_phy_clk_l, - .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), + .clk_list = qmp_v4_ref_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v4_ref_phy_clk_l), .reset_list = msm8996_usb3phy_reset_l, .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, -- GitLab From 905abf1229efd33aa57f3f65881c378770dfbb65 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:37 +0100 Subject: [PATCH 432/694] phy: qcom-qmp: drop unused type header The PHY type defines are no longer used in the PCIe, UFS and USB QMP drivers so drop the corresponding include. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 -- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 -- 4 files changed, 8 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c index ff198d846fd2d..a088477e274f7 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 47cccc4b35b22..53dde20a4ce06 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -23,8 +23,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 189103d1bd180..318eea35b9721 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 9fbad6b1d3ab1..64b9472a1a702 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -20,8 +20,6 @@ #include #include -#include - #include "phy-qcom-qmp.h" /* QPHY_SW_RESET bit */ -- GitLab From 64e1f12b2658c1abca55cffd9413f2d3c3bbfa8f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:38 +0100 Subject: [PATCH 433/694] phy: qcom-qmp-usb: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-3-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 64b9472a1a702..4aa338fc4643c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1485,6 +1485,8 @@ struct qmp_usb { enum phy_mode mode; struct phy *phy; + + struct clk_fixed_rate pipe_clk_fixed; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -2357,7 +2359,7 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; int ret; @@ -2367,10 +2369,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np) return ret; } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - init.ops = &clk_fixed_rate_ops; /* controllers using QMP phys use 125MHz pipe clock interface */ -- GitLab From e8511f407b078330dfcca0c7200e72b7638b6e17 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 11 Nov 2022 10:42:39 +0100 Subject: [PATCH 434/694] phy: qcom-qmp-pcie: drop redundant clock allocation Since the QMP driver split, there is no reason to allocate the fixed-rate pipe clock structure separately from the driver data. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221111094239.11547-4-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 53dde20a4ce06..1b136a87053f8 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1550,6 +1550,8 @@ struct qmp_pcie { struct phy *phy; int mode; + + struct clk_fixed_rate pipe_clk_fixed; }; static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) @@ -2416,7 +2418,7 @@ static void phy_clk_release_provider(void *res) */ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) { - struct clk_fixed_rate *fixed; + struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed; struct clk_init_data init = { }; int ret; @@ -2426,10 +2428,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np) return ret; } - fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL); - if (!fixed) - return -ENOMEM; - init.ops = &clk_fixed_rate_ops; /* -- GitLab From 5a0d2df462568486b85a88ed2c88ffbfa1645cd1 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:06 -0600 Subject: [PATCH 435/694] dt-bindings: sun6i-a31-mipi-dphy: Add the interrupts property The sun6i DPHY can generate several interrupts, mostly for reporting error conditions, but also for detecting BTA and UPLS sequences. Document this capability in order to accurately describe the hardware. The DPHY has no interrupt number provided in the vendor documentation because its interrupt line is shared with the DSI controller. A trivial interrupt handler was used to verify that interrupts were in fact generated by the DPHY and not the DSI controller. Signed-off-by: Samuel Holland Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221114022113.31694-2-samuel@sholland.org Signed-off-by: Vinod Koul --- .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml index dfb6a89935351..4c45d7fcc8536 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml @@ -24,6 +24,9 @@ properties: reg: maxItems: 1 + interrupts: + maxItems: 1 + clocks: items: - description: Bus Clock @@ -53,6 +56,7 @@ required: - "#phy-cells" - compatible - reg + - interrupts - clocks - clock-names - resets @@ -61,9 +65,12 @@ additionalProperties: false examples: - | + #include + dphy0: d-phy@1ca1000 { compatible = "allwinner,sun6i-a31-mipi-dphy"; reg = <0x01ca1000 0x1000>; + interrupts = ; clocks = <&ccu 23>, <&ccu 97>; clock-names = "bus", "mod"; resets = <&ccu 4>; -- GitLab From e7a838694185c7d0965baa9ed2515f2e0ff8d502 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:09 -0600 Subject: [PATCH 436/694] dt-bindings: sun6i-a31-mipi-dphy: Add the A100 DPHY variant A100 features an updated DPHY, which moves PLL control inside the DPHY register space. (Previously PLL-MIPI was controlled from the CCU. This does not affect the "clocks" property because the link between PLL-MIPI and the DPHY was never represented in the devicetree.) It also requires a modified analog power-on sequence. Finally, the new DPHY adds support for operating as an LVDS PHY. D1 uses this same variant. Acked-by: Krzysztof Kozlowski Reviewed-by: Paul Kocialkowski Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-5-samuel@sholland.org Signed-off-by: Vinod Koul --- .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml index 4c45d7fcc8536..fe9702e7bdd8a 100644 --- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml +++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml @@ -17,9 +17,13 @@ properties: compatible: oneOf: - const: allwinner,sun6i-a31-mipi-dphy + - const: allwinner,sun50i-a100-mipi-dphy - items: - const: allwinner,sun50i-a64-mipi-dphy - const: allwinner,sun6i-a31-mipi-dphy + - items: + - const: allwinner,sun20i-d1-mipi-dphy + - const: allwinner,sun50i-a100-mipi-dphy reg: maxItems: 1 -- GitLab From a709ae51e22802822de85ec7b672cf1cc5412fc0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:10 -0600 Subject: [PATCH 437/694] phy: allwinner: phy-sun6i-mipi-dphy: Make RX support optional While all variants of the DPHY likely support RX mode, the new variant in the A100 is not used in this direction by the BSP, and it has some analog register changes, so its RX power-on sequence is unknown. To be safe, limit RX support to variants where the power-on sequence is known. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-6-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index 3900f16508515..7d7322670a83d 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -114,6 +114,10 @@ enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_RX, }; +struct sun6i_dphy_variant { + bool rx_supported; +}; + struct sun6i_dphy { struct clk *bus_clk; struct clk *mod_clk; @@ -123,6 +127,7 @@ struct sun6i_dphy { struct phy *phy; struct phy_configure_opts_mipi_dphy config; + const struct sun6i_dphy_variant *variant; enum sun6i_dphy_direction direction; }; @@ -409,6 +414,10 @@ static int sun6i_dphy_probe(struct platform_device *pdev) if (!dphy) return -ENOMEM; + dphy->variant = device_get_match_data(&pdev->dev); + if (!dphy->variant) + return -EINVAL; + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) { dev_err(&pdev->dev, "Couldn't map the DPHY encoder registers\n"); @@ -445,8 +454,14 @@ static int sun6i_dphy_probe(struct platform_device *pdev) ret = of_property_read_string(pdev->dev.of_node, "allwinner,direction", &direction); - if (!ret && !strncmp(direction, "rx", 2)) + if (!ret && !strncmp(direction, "rx", 2)) { + if (!dphy->variant->rx_supported) { + dev_err(&pdev->dev, "RX not supported on this variant\n"); + return -EOPNOTSUPP; + } + dphy->direction = SUN6I_DPHY_DIRECTION_RX; + } phy_set_drvdata(dphy->phy, dphy); phy_provider = devm_of_phy_provider_register(&pdev->dev, of_phy_simple_xlate); @@ -454,8 +469,15 @@ static int sun6i_dphy_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(phy_provider); } +static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { + .rx_supported = true, +}; + static const struct of_device_id sun6i_dphy_of_table[] = { - { .compatible = "allwinner,sun6i-a31-mipi-dphy" }, + { + .compatible = "allwinner,sun6i-a31-mipi-dphy", + .data = &sun6i_a31_mipi_dphy_variant, + }, { } }; MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table); -- GitLab From cb7f49a31597066b25c9bc6a0bf0781454dd4d2b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:11 -0600 Subject: [PATCH 438/694] phy: allwinner: phy-sun6i-mipi-dphy: Set the enable bit last The A100 variant of the DPHY requires configuring the analog registers before setting the global enable bit. Since this order also works on the other variants, always use it, to minimize the differences between them. Reviewed-by: Paul Kocialkowski Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-7-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index 7d7322670a83d..a2afedc399fda 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -183,10 +183,6 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); - regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, - SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) | - SUN6I_DPHY_GCTL_EN); - regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, SUN6I_DPHY_ANA0_REG_PWS | SUN6I_DPHY_ANA0_REG_DMPC | @@ -244,6 +240,10 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK, SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask)); + regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, + SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) | + SUN6I_DPHY_GCTL_EN); + return 0; } -- GitLab From 3fd490a7197857dc3aa409e56e31deaeab097c5f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:12 -0600 Subject: [PATCH 439/694] phy: allwinner: phy-sun6i-mipi-dphy: Add a variant power-on hook The A100 variant uses the same values for the timing registers, and it uses the same final power-on sequence, but it needs a different analog register configuration in the middle. Support this by moving the variant-specific parts to a hook provided by the variant. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-8-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 59 ++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index a2afedc399fda..ac144ee0f0a62 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -114,7 +114,10 @@ enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_RX, }; +struct sun6i_dphy; + struct sun6i_dphy_variant { + void (*tx_power_on)(struct sun6i_dphy *dphy); bool rx_supported; }; @@ -156,33 +159,10 @@ static int sun6i_dphy_configure(struct phy *phy, union phy_configure_opts *opts) return 0; } -static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) +static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) { u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); - regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, - SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, - SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | - SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | - SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, - SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | - SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | - SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | - SUN6I_DPHY_TX_TIME1_CLK_POST(10)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, - SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); - - regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, - SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | - SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); - regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, SUN6I_DPHY_ANA0_REG_PWS | SUN6I_DPHY_ANA0_REG_DMPC | @@ -214,6 +194,36 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) SUN6I_DPHY_ANA3_EN_LDOC | SUN6I_DPHY_ANA3_EN_LDOD); udelay(1); +} + +static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) +{ + u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, + SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, + SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | + SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | + SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, + SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | + SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | + SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | + SUN6I_DPHY_TX_TIME1_CLK_POST(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, + SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, + SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | + SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); + + dphy->variant->tx_power_on(dphy); regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, SUN6I_DPHY_ANA3_EN_VTTC | @@ -470,6 +480,7 @@ static int sun6i_dphy_probe(struct platform_device *pdev) } static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { + .tx_power_on = sun6i_a31_mipi_dphy_tx_power_on, .rx_supported = true, }; -- GitLab From 4d0c2165e64eec00c19b68b1abc83e57e9633db9 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 20:21:13 -0600 Subject: [PATCH 440/694] phy: allwinner: phy-sun6i-mipi-dphy: Add the A100 DPHY variant A100 features an updated DPHY, which moves PLL control inside the DPHY register space (previously the PLL was controlled from the CCU). It also requires a modified analog power-on sequence. This "combo PHY" can also be used as an LVDS PHY, but that is not yet supported by the driver. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114022113.31694-9-samuel@sholland.org Signed-off-by: Vinod Koul --- drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 143 +++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c index ac144ee0f0a62..36eab95271b2d 100644 --- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c +++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c @@ -70,11 +70,19 @@ #define SUN6I_DPHY_ANA0_REG 0x4c #define SUN6I_DPHY_ANA0_REG_PWS BIT(31) +#define SUN6I_DPHY_ANA0_REG_PWEND BIT(30) +#define SUN6I_DPHY_ANA0_REG_PWENC BIT(29) #define SUN6I_DPHY_ANA0_REG_DMPC BIT(28) #define SUN6I_DPHY_ANA0_REG_DMPD(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA0_REG_SRXDT(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA0_REG_SRXCK(n) (((n) & 0xf) << 16) +#define SUN6I_DPHY_ANA0_REG_SDIV2 BIT(15) #define SUN6I_DPHY_ANA0_REG_SLV(n) (((n) & 7) << 12) #define SUN6I_DPHY_ANA0_REG_DEN(n) (((n) & 0xf) << 8) +#define SUN6I_DPHY_ANA0_REG_PLR(n) (((n) & 0xf) << 4) #define SUN6I_DPHY_ANA0_REG_SFB(n) (((n) & 3) << 2) +#define SUN6I_DPHY_ANA0_REG_RSD BIT(1) +#define SUN6I_DPHY_ANA0_REG_SELSCK BIT(0) #define SUN6I_DPHY_ANA1_REG 0x50 #define SUN6I_DPHY_ANA1_REG_VTTMODE BIT(31) @@ -97,8 +105,13 @@ #define SUN6I_DPHY_ANA3_EN_LDOR BIT(18) #define SUN6I_DPHY_ANA4_REG 0x5c +#define SUN6I_DPHY_ANA4_REG_EN_MIPI BIT(31) +#define SUN6I_DPHY_ANA4_REG_EN_COMTEST BIT(30) +#define SUN6I_DPHY_ANA4_REG_COMTEST(n) (((n) & 3) << 28) +#define SUN6I_DPHY_ANA4_REG_IB(n) (((n) & 3) << 25) #define SUN6I_DPHY_ANA4_REG_DMPLVC BIT(24) #define SUN6I_DPHY_ANA4_REG_DMPLVD(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA4_REG_VTT_SET(n) (((n) & 0x7) << 17) #define SUN6I_DPHY_ANA4_REG_CKDV(n) (((n) & 0x1f) << 12) #define SUN6I_DPHY_ANA4_REG_TMSC(n) (((n) & 3) << 10) #define SUN6I_DPHY_ANA4_REG_TMSD(n) (((n) & 3) << 8) @@ -109,6 +122,56 @@ #define SUN6I_DPHY_DBG5_REG 0xf4 +#define SUN50I_DPHY_TX_SLEW_REG0 0xf8 +#define SUN50I_DPHY_TX_SLEW_REG1 0xfc +#define SUN50I_DPHY_TX_SLEW_REG2 0x100 + +#define SUN50I_DPHY_PLL_REG0 0x104 +#define SUN50I_DPHY_PLL_REG0_CP36_EN BIT(23) +#define SUN50I_DPHY_PLL_REG0_LDO_EN BIT(22) +#define SUN50I_DPHY_PLL_REG0_EN_LVS BIT(21) +#define SUN50I_DPHY_PLL_REG0_PLL_EN BIT(20) +#define SUN50I_DPHY_PLL_REG0_P(n) (((n) & 0xf) << 16) +#define SUN50I_DPHY_PLL_REG0_N(n) (((n) & 0xff) << 8) +#define SUN50I_DPHY_PLL_REG0_NDET BIT(7) +#define SUN50I_DPHY_PLL_REG0_TDIV BIT(6) +#define SUN50I_DPHY_PLL_REG0_M0(n) (((n) & 3) << 4) +#define SUN50I_DPHY_PLL_REG0_M1(n) ((n) & 0xf) + +#define SUN50I_DPHY_PLL_REG1 0x108 +#define SUN50I_DPHY_PLL_REG1_UNLOCK_MDSEL(n) (((n) & 3) << 14) +#define SUN50I_DPHY_PLL_REG1_LOCKMDSEL BIT(13) +#define SUN50I_DPHY_PLL_REG1_LOCKDET_EN BIT(12) +#define SUN50I_DPHY_PLL_REG1_VSETA(n) (((n) & 0x7) << 9) +#define SUN50I_DPHY_PLL_REG1_VSETD(n) (((n) & 0x7) << 6) +#define SUN50I_DPHY_PLL_REG1_LPF_SW BIT(5) +#define SUN50I_DPHY_PLL_REG1_ICP_SEL(n) (((n) & 3) << 3) +#define SUN50I_DPHY_PLL_REG1_ATEST_SEL(n) (((n) & 3) << 1) +#define SUN50I_DPHY_PLL_REG1_TEST_EN BIT(0) + +#define SUN50I_DPHY_PLL_REG2 0x10c +#define SUN50I_DPHY_PLL_REG2_SDM_EN BIT(31) +#define SUN50I_DPHY_PLL_REG2_FF_EN BIT(30) +#define SUN50I_DPHY_PLL_REG2_SS_EN BIT(29) +#define SUN50I_DPHY_PLL_REG2_SS_FRAC(n) (((n) & 0x1ff) << 20) +#define SUN50I_DPHY_PLL_REG2_SS_INT(n) (((n) & 0xff) << 12) +#define SUN50I_DPHY_PLL_REG2_FRAC(n) ((n) & 0xfff) + +#define SUN50I_COMBO_PHY_REG0 0x110 +#define SUN50I_COMBO_PHY_REG0_EN_TEST_COMBOLDO BIT(5) +#define SUN50I_COMBO_PHY_REG0_EN_TEST_0P8 BIT(4) +#define SUN50I_COMBO_PHY_REG0_EN_MIPI BIT(3) +#define SUN50I_COMBO_PHY_REG0_EN_LVDS BIT(2) +#define SUN50I_COMBO_PHY_REG0_EN_COMBOLDO BIT(1) +#define SUN50I_COMBO_PHY_REG0_EN_CP BIT(0) + +#define SUN50I_COMBO_PHY_REG1 0x114 +#define SUN50I_COMBO_PHY_REG2_REG_VREF1P6(n) (((n) & 0x7) << 4) +#define SUN50I_COMBO_PHY_REG2_REG_VREF0P8(n) ((n) & 0x7) + +#define SUN50I_COMBO_PHY_REG2 0x118 +#define SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(n) ((n) & 0xff) + enum sun6i_dphy_direction { SUN6I_DPHY_DIRECTION_TX, SUN6I_DPHY_DIRECTION_RX, @@ -196,6 +259,76 @@ static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) udelay(1); } +static void sun50i_a100_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy) +{ + unsigned long mipi_symbol_rate = dphy->config.hs_clk_rate; + unsigned int div, n; + + regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_IB(2) | + SUN6I_DPHY_ANA4_REG_DMPLVD(4) | + SUN6I_DPHY_ANA4_REG_VTT_SET(3) | + SUN6I_DPHY_ANA4_REG_CKDV(3) | + SUN6I_DPHY_ANA4_REG_TMSD(1) | + SUN6I_DPHY_ANA4_REG_TMSC(1) | + SUN6I_DPHY_ANA4_REG_TXPUSD(2) | + SUN6I_DPHY_ANA4_REG_TXPUSC(3) | + SUN6I_DPHY_ANA4_REG_TXDNSD(2) | + SUN6I_DPHY_ANA4_REG_TXDNSC(3)); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_CK_CPU, + SUN6I_DPHY_ANA2_EN_CK_CPU); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_REG_ENIB, + SUN6I_DPHY_ANA2_REG_ENIB); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_LDOR | + SUN6I_DPHY_ANA3_EN_LDOC | + SUN6I_DPHY_ANA3_EN_LDOD); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, + SUN6I_DPHY_ANA0_REG_PLR(4) | + SUN6I_DPHY_ANA0_REG_SFB(1)); + + regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG0, + SUN50I_COMBO_PHY_REG0_EN_CP); + + /* Choose a divider to limit the VCO frequency to around 2 GHz. */ + div = 16 >> order_base_2(DIV_ROUND_UP(mipi_symbol_rate, 264000000)); + n = mipi_symbol_rate * div / 24000000; + + regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG0, + SUN50I_DPHY_PLL_REG0_CP36_EN | + SUN50I_DPHY_PLL_REG0_LDO_EN | + SUN50I_DPHY_PLL_REG0_EN_LVS | + SUN50I_DPHY_PLL_REG0_PLL_EN | + SUN50I_DPHY_PLL_REG0_NDET | + SUN50I_DPHY_PLL_REG0_P((div - 1) % 8) | + SUN50I_DPHY_PLL_REG0_N(n) | + SUN50I_DPHY_PLL_REG0_M0((div - 1) / 8) | + SUN50I_DPHY_PLL_REG0_M1(2)); + + /* Disable sigma-delta modulation. */ + regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG2, 0); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_EN_MIPI, + SUN6I_DPHY_ANA4_REG_EN_MIPI); + + regmap_update_bits(dphy->regs, SUN50I_COMBO_PHY_REG0, + SUN50I_COMBO_PHY_REG0_EN_MIPI | + SUN50I_COMBO_PHY_REG0_EN_COMBOLDO, + SUN50I_COMBO_PHY_REG0_EN_MIPI | + SUN50I_COMBO_PHY_REG0_EN_COMBOLDO); + + regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG2, + SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(20)); + udelay(1); +} + static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy) { u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0); @@ -408,7 +541,7 @@ static const struct regmap_config sun6i_dphy_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = SUN6I_DPHY_DBG5_REG, + .max_register = SUN50I_COMBO_PHY_REG2, .name = "mipi-dphy", }; @@ -484,11 +617,19 @@ static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = { .rx_supported = true, }; +static const struct sun6i_dphy_variant sun50i_a100_mipi_dphy_variant = { + .tx_power_on = sun50i_a100_mipi_dphy_tx_power_on, +}; + static const struct of_device_id sun6i_dphy_of_table[] = { { .compatible = "allwinner,sun6i-a31-mipi-dphy", .data = &sun6i_a31_mipi_dphy_variant, }, + { + .compatible = "allwinner,sun50i-a100-mipi-dphy", + .data = &sun50i_a100_mipi_dphy_variant, + }, { } }; MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table); -- GitLab From b53e19799d45edf7ca7cfd0cf5d6fb2d5179edec Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Thu, 17 Nov 2022 14:11:43 +0800 Subject: [PATCH 441/694] phy: use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Minghao Chi Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/202211171411439883050@zte.com.cn Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mmp3-hsic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mmp3-hsic.c b/drivers/phy/marvell/phy-mmp3-hsic.c index 7cccf01848d8d..f2537fdcc3ab4 100644 --- a/drivers/phy/marvell/phy-mmp3-hsic.c +++ b/drivers/phy/marvell/phy-mmp3-hsic.c @@ -41,12 +41,10 @@ static int mmp3_hsic_phy_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct phy_provider *provider; - struct resource *resource; void __iomem *base; struct phy *phy; - resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(dev, resource); + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(base)) return PTR_ERR(base); -- GitLab From f12faa3be8e84fa9232a4654bccb30f46bbfee5c Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 22 Nov 2022 19:24:12 -0800 Subject: [PATCH 442/694] dt-bindings: phy-j721e-wiz: add j721s2 compatible string Add ti,j721s2-wiz-10g compatible string to binding documentation. Signed-off-by: Matt Ranostay Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221123032413.1193961-2-mranostay@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml index a9e38739c0100..c54b36c104abe 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml @@ -15,6 +15,7 @@ properties: enum: - ti,j721e-wiz-16g - ti,j721e-wiz-10g + - ti,j721s2-wiz-10g - ti,am64-wiz-10g - ti,j7200-wiz-10g - ti,j784s4-wiz-10g -- GitLab From bea3ce759b4664f20f1f57c53fe018c3b67da147 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Tue, 22 Nov 2022 19:24:13 -0800 Subject: [PATCH 443/694] phy: ti: phy-j721e-wiz: add j721s2-wiz-10g module support Add support for j721s2-wiz-10g device which is similar to j721e-wiz-10g but uses clock-names interface versus explicitly defining clock nodes within device tree node. Signed-off-by: Matt Ranostay Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20221123032413.1193961-3-mranostay@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-j721e-wiz.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 141b51af4427a..ddce5ef7711c6 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -322,6 +322,7 @@ enum wiz_type { AM64_WIZ_10G, J7200_WIZ_10G, /* J7200 SR2.0 */ J784S4_WIZ_10G, + J721S2_WIZ_10G, }; struct wiz_data { @@ -1000,6 +1001,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node) case AM64_WIZ_10G: case J7200_WIZ_10G: case J784S4_WIZ_10G: + case J721S2_WIZ_10G: of_clk_del_provider(dev->of_node); return; default: @@ -1132,6 +1134,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node) case AM64_WIZ_10G: case J7200_WIZ_10G: case J784S4_WIZ_10G: + case J721S2_WIZ_10G: ret = wiz_clock_register(wiz); if (ret) dev_err(dev, "Failed to register wiz clocks\n"); @@ -1214,6 +1217,7 @@ static int wiz_phy_fullrt_div(struct wiz *wiz, int lane) break; case J721E_WIZ_10G: case J7200_WIZ_10G: + case J721S2_WIZ_10G: if (wiz->lane_phy_type[lane] == PHY_TYPE_SGMII) return regmap_field_write(wiz->p0_fullrt_div[lane], 0x2); break; @@ -1318,6 +1322,15 @@ static struct wiz_data j784s4_10g_data = { .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, }; +static struct wiz_data j721s2_10g_data = { + .type = J721S2_WIZ_10G, + .pll0_refclk_mux_sel = &pll0_refclk_mux_sel, + .pll1_refclk_mux_sel = &pll1_refclk_mux_sel, + .refclk_dig_sel = &refclk_dig_sel_10g, + .clk_mux_sel = clk_mux_sel_10g, + .clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G, +}; + static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data, @@ -1334,6 +1347,9 @@ static const struct of_device_id wiz_id_table[] = { { .compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data, }, + { + .compatible = "ti,j721s2-wiz-10g", .data = &j721s2_10g_data, + }, {} }; MODULE_DEVICE_TABLE(of, wiz_id_table); -- GitLab From 248043299bf61134fb675d16963e11f49e79b05b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Nov 2022 12:12:49 +0200 Subject: [PATCH 444/694] modpost: Mark uuid_le type to be suitable only for MEI The uuid_le type is used only in MEI ABI, do not advertise it for others. While at it, comment out that UUID types are not to be used in a new code. Signed-off-by: Andy Shevchenko --- scripts/mod/file2alias.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 7df23905fdf1c..91c2e7ba5e52c 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -34,19 +34,23 @@ typedef Elf64_Addr kernel_ulong_t; typedef uint32_t __u32; typedef uint16_t __u16; typedef unsigned char __u8; + +/* UUID types for backward compatibility, don't use in new code */ typedef struct { __u8 b[16]; } guid_t; -/* backwards compatibility, don't use in new code */ -typedef struct { - __u8 b[16]; -} uuid_le; typedef struct { __u8 b[16]; } uuid_t; + #define UUID_STRING_LEN 36 +/* MEI UUID type, don't use anywhere else */ +typedef struct { + __u8 b[16]; +} uuid_le; + /* Big exception to the "don't include kernel headers into userspace, which * even potentially has different endianness and word sizes, since * we handle those differences explicitly below */ -- GitLab From 6c645b01e536757a9e1a9f72c13767f9b3f8559f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:26 +1000 Subject: [PATCH 445/694] KVM: PPC: Book3E: Fix CONFIG_TRACE_IRQFLAGS support 32-bit does not trace_irqs_off() to match the trace_irqs_on() call in kvmppc_fix_ee_before_entry(). This can lead to irqs being enabled twice in the trace, and the irqs-off region between guest exit and the host enabling local irqs again is not properly traced. 64-bit code does call this, but from asm code where volatiles are live and so incorrectly get clobbered. Move the irq reconcile into C to fix both problems. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-2-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_ppc.h | 12 ++++++++++++ arch/powerpc/kvm/booke.c | 3 +++ arch/powerpc/kvm/bookehv_interrupts.S | 9 --------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bfacf12784dde..eae9619b61903 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -1014,6 +1014,18 @@ static inline void kvmppc_fix_ee_before_entry(void) #endif } +static inline void kvmppc_fix_ee_after_exit(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ALL_DISABLED); +#endif + + trace_hardirqs_off(); +} + + static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) { ulong ea; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7b4920e9fd263..0dce93ccaadfa 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1015,6 +1015,9 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) u32 last_inst = KVM_INST_FETCH_FAILED; enum emulation_result emulated = EMULATE_DONE; + /* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */ + kvmppc_fix_ee_after_exit(); + /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 8262c14fc9e63..b5fe6fb53c662 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -424,15 +424,6 @@ _GLOBAL(kvmppc_resume_host) mtspr SPRN_EPCR, r3 isync -#ifdef CONFIG_64BIT - /* - * We enter with interrupts disabled in hardware, but - * we need to call RECONCILE_IRQ_STATE to ensure - * that the software state is kept in sync. - */ - RECONCILE_IRQ_STATE(r3,r5) -#endif - /* Switch to kernel stack and jump to handler. */ mr r3, r4 mr r5, r14 /* intno */ -- GitLab From dea681c91d3cd5326f87d0a3c93079573e22ce9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Nov 2022 08:22:25 +0100 Subject: [PATCH 446/694] powerpc/ps3: mark ps3_system_bus_type static ps3_system_bus_type is only used inside of system-bus.c, so remove the external declaration and the very outdated comment next to it. Signed-off-by: Christoph Hellwig Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221122072225.423432-1-hch@lst.de --- arch/powerpc/include/asm/ps3.h | 4 ---- arch/powerpc/platforms/ps3/system-bus.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 8a0d8fb353286..d503dbd7856cb 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -425,10 +425,6 @@ static inline void *ps3_system_bus_get_drvdata( return dev_get_drvdata(&dev->core); } -/* These two need global scope for get_arch_dma_ops(). */ - -extern struct bus_type ps3_system_bus_type; - /* system manager */ struct ps3_sys_manager_ops { diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 2502e9b17df4a..38a7e02295c8f 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -466,7 +466,7 @@ static struct attribute *ps3_system_bus_dev_attrs[] = { }; ATTRIBUTE_GROUPS(ps3_system_bus_dev); -struct bus_type ps3_system_bus_type = { +static struct bus_type ps3_system_bus_type = { .name = "ps3_system_bus", .match = ps3_system_bus_match, .uevent = ps3_system_bus_uevent, -- GitLab From 71ae6305ad41cfd1ac5aa91d356e71c7a537df2e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:52 +0530 Subject: [PATCH 447/694] selftests/powerpc: Move perror closer to its use Right now, if perf_event_open() fails for the systemwide tests, error report is printed too late, sometimes after subsequent system calls. Move use of perror() to the main function, just after the syscall. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/372ac78c27899f1f612fbd6ac796604a4a9310aa.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index ecde2c199f3b1..ea5e14ecbf309 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -146,6 +146,7 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len for (i = 0; i < nprocs; i++) { fd[i] = perf_cpu_event_open(i, type, addr, len); if (fd[i] < 0) { + perror("perf_systemwide_event_open"); close_fds(fd, i); return fd[i]; } @@ -543,15 +544,12 @@ static int test_syswide_multi_diff_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -590,15 +588,12 @@ static int test_syswide_multi_same_addr(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -637,15 +632,12 @@ static int test_syswide_multi_diff_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } @@ -684,15 +676,12 @@ static int test_syswide_multi_same_addr_ro_wo(void) int ret; ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); - if (ret) { - perror("perf_systemwide_event_open"); + if (ret) exit(EXIT_FAILURE); - } ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); if (ret) { close_fds(fd1, nprocs); - perror("perf_systemwide_event_open"); exit(EXIT_FAILURE); } -- GitLab From 616ad3f4aac287c48b66c92cb777395b4465ed4f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:53 +0530 Subject: [PATCH 448/694] selftests/powerpc: Bump up rlimit for perf-hwbreak test The systemwide perf hardware breakpoint test tries to open a perf event on each cpu. On large systems, we run out of file descriptors and fail the test. Instead, have the test set the file descriptor limit to an arbitraty high value. Reported-by: Rohan Deshpande Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/187fed5843cecc1e5066677b6296ee88337d7bef.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../testing/selftests/powerpc/ptrace/perf-hwbreak.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index ea5e14ecbf309..866e5be48ee33 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -140,8 +141,19 @@ static void disable_fds(int *fd, int n) static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) { + struct rlimit rlim; int i = 0; + if (getrlimit(RLIMIT_NOFILE, &rlim)) { + perror("getrlimit"); + return -1; + } + rlim.rlim_cur = 65536; + if (setrlimit(RLIMIT_NOFILE, &rlim)) { + perror("setrlimit"); + return -1; + } + /* Assume online processors are 0 to nprocs for simplisity */ for (i = 0; i < nprocs; i++) { fd[i] = perf_cpu_event_open(i, type, addr, len); -- GitLab From 260095926d3956071c6699a28824c3f0fa7cd97a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 22 Nov 2022 12:10:54 +0530 Subject: [PATCH 449/694] selftests/powerpc: Account for offline cpus in perf-hwbreak test For systemwide tests, use online cpu mask to only open events on online cpus. This enables this test to work on systems in lower SMT modes. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/15fd447dcefd19945a7d31f0a475349f548a3603.1669096083.git.naveen.n.rao@linux.vnet.ibm.com --- .../selftests/powerpc/ptrace/perf-hwbreak.c | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 866e5be48ee33..f75739bbad28c 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -17,8 +17,11 @@ * Copyright (C) 2018 Michael Neuling, IBM Corporation. */ +#define _GNU_SOURCE + #include #include +#include #include #include #include @@ -141,8 +144,10 @@ static void disable_fds(int *fd, int n) static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) { + int i, ncpus, cpu, ret = 0; struct rlimit rlim; - int i = 0; + cpu_set_t *mask; + size_t size; if (getrlimit(RLIMIT_NOFILE, &rlim)) { perror("getrlimit"); @@ -154,16 +159,44 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len return -1; } - /* Assume online processors are 0 to nprocs for simplisity */ - for (i = 0; i < nprocs; i++) { - fd[i] = perf_cpu_event_open(i, type, addr, len); + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); + return -1; + } + + CPU_ZERO_S(size, mask); + + if (sched_getaffinity(0, size, mask)) { + perror("sched_getaffinity"); + ret = -1; + goto done; + } + + for (i = 0, cpu = 0; i < nprocs && cpu < ncpus; cpu++) { + if (!CPU_ISSET_S(cpu, size, mask)) + continue; + fd[i] = perf_cpu_event_open(cpu, type, addr, len); if (fd[i] < 0) { perror("perf_systemwide_event_open"); close_fds(fd, i); - return fd[i]; + ret = fd[i]; + goto done; } + i++; } - return 0; + + if (i < nprocs) { + printf("Error: Number of online cpus reduced since start of test: %d < %d\n", i, nprocs); + close_fds(fd, i); + ret = -1; + } + +done: + CPU_FREE(mask); + return ret; } static inline bool breakpoint_test(int len) -- GitLab From d5090716be6791ada9ee142163a4934c1c147aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:10:00 +0100 Subject: [PATCH 450/694] powerpc/book3e: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") removed the usage of the define UTS_RELEASE but forgot to drop the include. utsrelease.h is potentially generated on each build. By removing the unused include we can get rid of some spurious recompilations. Fixes: 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") Signed-off-by: Thomas Weißschuh Reviewed-by: Masahiro Yamada Reviewed-by: Christophe Leroy [mpe: Fix typo in change log and add more explanation] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126051002.123199-2-linux@weissschuh.net --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 0d04f9d5da8d2..2fb3edafe9ab6 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -19,7 +19,6 @@ #include #include #include -#include struct regions { unsigned long pa_start; -- GitLab From 67bbb62f61e810734da0a1577a9802ddaed24140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 30 Sep 2022 14:39:01 +0200 Subject: [PATCH 451/694] powerpc: dts: turris1x.dts: Add channel labels for temperature sensor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Channel 0 of SA56004ED chip refers to internal SA56004ED chip sensor (chip itself is located on the board) and channel 1 of SA56004ED chip refers to external sensor which is connected to temperature diode of the P2020 CPU. Fixes: 54c15ec3b738 ("powerpc: dts: Add DTS file for CZ.NIC Turris 1.x routers") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220930123901.10251-1-pali@kernel.org --- arch/powerpc/boot/dts/turris1x.dts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index 045af668e9284..e9cda34a140e0 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -69,6 +69,20 @@ interrupt-parent = <&gpio>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */ <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */ + #address-cells = <1>; + #size-cells = <0>; + + /* Local temperature sensor (SA56004ED internal) */ + channel@0 { + reg = <0>; + label = "board"; + }; + + /* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */ + channel@1 { + reg = <1>; + label = "cpu"; + }; }; /* DDR3 SPD/EEPROM */ -- GitLab From e082e99f6f87f5204b2531d5a3db7bbd929d23b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 20 Aug 2022 14:33:27 +0200 Subject: [PATCH 452/694] powerpc/fsl-pci: Choose PCI host bridge with alias pci0 as the primary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there's no PCI host bridge with ISA then check for PCI host bridge with alias "pci0" (first PCI host bridge) and if it exists then choose it as the primary PCI host bridge. This makes choice of primary PCI host bridge more stable across boots and updates as the last fallback candidate for primary PCI host bridge (if there is no choice) is selected arbitrary. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220820123327.20551-1-pali@kernel.org --- arch/powerpc/sysdev/fsl_pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 974d3db6faab2..b7232c46b2448 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1138,6 +1138,19 @@ void __init fsl_pci_assign_primary(void) return; } + /* + * If there's no PCI host bridge with ISA then check for + * PCI host bridge with alias "pci0" (first PCI host bridge). + */ + np = of_find_node_by_path("pci0"); + if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) { + fsl_pci_primary = np; + of_node_put(np); + return; + } + if (np) + of_node_put(np); + /* * If there's no PCI host bridge with ISA, arbitrarily * designate one as primary. This can go away once -- GitLab From 3671f4ebe3eb12e7222e4d7b0f94e85cfe34253a Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 9 Nov 2022 15:51:04 +1100 Subject: [PATCH 453/694] powerpc: Allow clearing and restoring registers independent of saved breakpoint state For the coming temporary mm used for instruction patching, the breakpoint registers need to be cleared to prevent them from accidentally being triggered. As soon as the patching is done, the breakpoints will be restored. The breakpoint state is stored in the per-cpu variable current_brk[]. Add a suspend_breakpoints() function which will clear the breakpoint registers without touching the state in current_brk[]. Add a pair function restore_breakpoints() which will move the state in current_brk[] back to the registers. Signed-off-by: Jordan Niethe Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-2-bgray@linux.ibm.com --- arch/powerpc/include/asm/debug.h | 2 ++ arch/powerpc/kernel/process.c | 38 +++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 86a14736c76c3..51c744608f37a 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); +void suspend_breakpoints(void); +void restore_breakpoints(void); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e3e1feaa536a9..5265da2d80340 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) return 0; } -void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); - if (dawr_enabled()) // Power8 or later set_dawr(nr, brk); @@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +{ + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); + set_hw_breakpoint(nr, brk); +} + /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { @@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); +/* Disable the breakpoint in hardware without touching current_brk[] */ +void suspend_breakpoints(void) +{ + struct arch_hw_breakpoint brk = {0}; + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, &brk); +} + +/* + * Re-enable breakpoints suspended by suspend_breakpoints() in hardware + * from current_brk[] + */ +void restore_breakpoints(void) +{ + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i])); +} + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) -- GitLab From 071c95c1acbd96e76bab8b25b5cad0d71a011f37 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:05 +1100 Subject: [PATCH 454/694] powerpc/code-patching: Use WARN_ON and fix check in poking_init BUG_ON() when failing to initialise the code patching window is unnecessary, and use of BUG_ON is discouraged. We don't set poking_init_done in this case, so failure to init the boot CPU will result in a strict RWX error when a following patch_instruction uses raw_patch_instruction. If it only fails for later CPUs, they won't be onlined in the first place. The return value of cpuhp_setup_state() is also >= 0 on success, so check for < 0. Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-3-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd09..3055eef7dcdcc 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -81,16 +81,17 @@ static int text_area_cpu_down(unsigned int cpu) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); -/* - * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and - * we judge it as being preferable to a kernel that will crash later when - * someone tries to use patch_instruction(). - */ void __init poking_init(void) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); + + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; + static_branch_enable(&poking_init_done); } -- GitLab From baf1ed24b27db475b38f534953885d0425e2232d Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:07 +1100 Subject: [PATCH 455/694] powerpc/mm: Remove empty hash__ functions The empty hash__* functions are unnecessary. The empty definitions were introduced when 64-bit Hash support was added, as the functions were still used in generic code. These empty definitions were prefixed with hash__ when Radix support was added, and new wrappers with the original names were added that selected the Radix or Hash version based on radix_enabled(). But the hash__ prefixed functions were not part of a public interface, so there is no need to include them for compatibility with anything. Generic code will use the non-prefixed wrappers, and Hash specific code will know that there is no point in calling them (or even worse, call them and expect them to do something). Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-5-bgray@linux.ibm.com --- .../include/asm/book3s/64/tlbflush-hash.h | 28 ------------------- arch/powerpc/include/asm/book3s/64/tlbflush.h | 27 ++++++------------ 2 files changed, 9 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 751921f6db461..a9ef40dc263e0 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -65,13 +65,6 @@ extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); -static inline void hash__local_flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void hash__flush_tlb_mm(struct mm_struct *mm) -{ -} static inline void hash__local_flush_all_mm(struct mm_struct *mm) { @@ -95,27 +88,6 @@ static inline void hash__flush_all_mm(struct mm_struct *mm) WARN_ON_ONCE(1); } -static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void hash__flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ -} - - struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 67655cd605456..2254a40f0564b 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -47,8 +47,7 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_pmd_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_pmd_tlb_range(vma, start, end); } #define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE @@ -57,39 +56,34 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long end) { if (radix_enabled()) - return radix__flush_hugetlb_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_hugetlb_tlb_range(vma, start, end); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_tlb_range(vma, start, end); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_kernel_range(start, end); - return hash__flush_tlb_kernel_range(start, end); + radix__flush_tlb_kernel_range(start, end); } static inline void local_flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__local_flush_tlb_mm(mm); - return hash__local_flush_tlb_mm(mm); + radix__local_flush_tlb_mm(mm); } static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__local_flush_tlb_page(vma, vmaddr); - return hash__local_flush_tlb_page(vma, vmaddr); + radix__local_flush_tlb_page(vma, vmaddr); } static inline void local_flush_all_mm(struct mm_struct *mm) @@ -102,24 +96,21 @@ static inline void local_flush_all_mm(struct mm_struct *mm) static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) - return radix__tlb_flush(tlb); - return hash__tlb_flush(tlb); + radix__tlb_flush(tlb); } #ifdef CONFIG_SMP static inline void flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__flush_tlb_mm(mm); - return hash__flush_tlb_mm(mm); + radix__flush_tlb_mm(mm); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__flush_tlb_page(vma, vmaddr); - return hash__flush_tlb_page(vma, vmaddr); + radix__flush_tlb_page(vma, vmaddr); } static inline void flush_all_mm(struct mm_struct *mm) -- GitLab From 0f0a0a6091e678b1a75078ecd6b02176f3228dbb Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:08 +1100 Subject: [PATCH 456/694] cxl: Use radix__flush_all_mm instead of generic flush_all_mm The generic implementation of this function isn't really generic (Hash is not implemented). Unfortunately, the runtime warnings cannot be replaced with BUILD_BUG's, so it seems safer not to provide a stub in the first place. Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-6-bgray@linux.ibm.com --- arch/powerpc/include/asm/mmu_context.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index c1ea270bb8486..57f5017111f47 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -151,8 +151,8 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * nMMU and/or PSL need to be cleaned up. * * Both the 'copros' and 'active_cpus' counts are looked at in - * flush_all_mm() to determine the scope (local/global) of the - * TLBIs, so we need to flush first before decrementing + * radix__flush_all_mm() to determine the scope (local/global) + * of the TLBIs, so we need to flush first before decrementing * 'copros'. If this API is used by several callers for the * same context, it can lead to over-flushing. It's hopefully * not common enough to be a problem. @@ -164,7 +164,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * in-between. */ if (radix_enabled()) { - flush_all_mm(mm); + radix__flush_all_mm(mm); c = atomic_dec_if_positive(&mm->context.copros); /* Detect imbalance between add and remove */ -- GitLab From d34471c9bd5d47ab148dd68817631a4238f755c4 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:09 +1100 Subject: [PATCH 457/694] powerpc/mm: Remove flush_all_mm, local_flush_all_mm These functions were introduced for "cxl: Enable global TLBIs for cxl contexts" [1], which ended up using them for Radix only. They were never implemented on Hash (and creating an implementation appears to be difficult), so nothing can actually rely on them. They behave differently to the existing surrounding functions too, in that they actually need to do something on Hash. The other functions are primarily for use in generic code that expects their definitions, but Hash updates the TLB during PTE updates. After replacing the only usage with the Radix specific version, there are no more users of these functions, and given they are not implemented anyway it is safe to delete them. [1]: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20170903181513.29635-1-fbarrat@linux.vnet.ibm.com/ Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-7-bgray@linux.ibm.com --- .../include/asm/book3s/64/tlbflush-hash.h | 22 ------------------- arch/powerpc/include/asm/book3s/64/tlbflush.h | 15 ------------- 2 files changed, 37 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index a9ef40dc263e0..146287d9580f4 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -66,28 +66,6 @@ extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); -static inline void hash__local_flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - -static inline void hash__flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 2254a40f0564b..c56a0aee8124b 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -86,13 +86,6 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, radix__local_flush_tlb_page(vma, vmaddr); } -static inline void local_flush_all_mm(struct mm_struct *mm) -{ - if (radix_enabled()) - return radix__local_flush_all_mm(mm); - return hash__local_flush_all_mm(mm); -} - static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) @@ -112,17 +105,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, if (radix_enabled()) radix__flush_tlb_page(vma, vmaddr); } - -static inline void flush_all_mm(struct mm_struct *mm) -{ - if (radix_enabled()) - return radix__flush_all_mm(mm); - return hash__flush_all_mm(mm); -} #else #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) -#define flush_all_mm(mm) local_flush_all_mm(mm) #endif /* CONFIG_SMP */ #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault -- GitLab From 274d842fa1efd9449e62222c8896e0be11621f1f Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:10 +1100 Subject: [PATCH 458/694] powerpc/tlb: Add local flush for page given mm_struct and psize Adds a local TLB flush operation that works given an mm_struct, VA to flush, and page size representation. Most implementations mirror the surrounding code. The book3s/32/tlbflush.h implementation is left as a BUILD_BUG because it is more complicated and not required for anything as yet. This removes the need to create a vm_area_struct, which the temporary patching mm work does not need. Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-8-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 +++++++++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 7 +++++++ arch/powerpc/include/asm/nohash/tlbflush.h | 7 +++++++ arch/powerpc/mm/nohash/tlb.c | 8 ++++++++ 4 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index ba1743c52b56d..4be5729081243 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#include + #define MMU_NO_CONTEXT (0) /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx @@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, { flush_tlb_page(vma, vmaddr); } + +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + BUILD_BUG(); +} + static inline void local_flush_tlb_mm(struct mm_struct *mm) { flush_tlb_mm(mm); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index c56a0aee8124b..dd39313242b4d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -86,6 +86,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, radix__local_flush_tlb_page(vma, vmaddr); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + if (radix_enabled()) + radix__local_flush_tlb_page_psize(mm, vmaddr, psize); +} + static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index bdaf34ad41ea6..9a2cf83ea4f19 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; @@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 2c15c86c70157..a903b308acc54 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mmu_get_tsize(mmu_virtual_psize), 0); } EXPORT_SYMBOL(local_flush_tlb_page); + +void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0); +} +EXPORT_SYMBOL(local_flush_tlb_page_psize); + #endif /* -- GitLab From 32975c491ee410598b33201344c123fcc81a7c33 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Nov 2022 17:39:18 +0100 Subject: [PATCH 459/694] uapi: Add missing _UAPI prefix to include guard Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/asm-generic/types.h b/include/uapi/asm-generic/types.h index dfaa50d99d8f3..7ad4dd01b8bf0 100644 --- a/include/uapi/asm-generic/types.h +++ b/include/uapi/asm-generic/types.h @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_GENERIC_TYPES_H -#define _ASM_GENERIC_TYPES_H +#ifndef _UAPI_ASM_GENERIC_TYPES_H +#define _UAPI_ASM_GENERIC_TYPES_H /* * int-ll64 is used everywhere now. */ #include -#endif /* _ASM_GENERIC_TYPES_H */ +#endif /* _UAPI_ASM_GENERIC_TYPES_H */ -- GitLab From 9f61521c7a284e799050cd2adacc9a611bd2b491 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 13:11:13 +1000 Subject: [PATCH 460/694] powerpc/qspinlock: powerpc qspinlock implementation Add a powerpc specific implementation of queued spinlocks. This is the build framework with a very simple (non-queued) spinlock implementation to begin with. Later changes add queueing, and other features and optimisations one-at-a-time. It is done this way to more easily see how the queued spinlocks are built, and to make performance and correctness bisects more useful. Signed-off-by: Nicholas Piggin [mpe: Drop paravirt.h & processor.h changes to fix 32-bit build] [mpe: Fix 32-bit build of qspinlock.o & disallow GENERIC_LOCKBREAK per Nick] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/CONLLQB6DCJU.2ZPOS7T6S5GRR@bobo --- arch/powerpc/Kconfig | 3 +- arch/powerpc/include/asm/qspinlock.h | 86 +++++++------------ arch/powerpc/include/asm/qspinlock_paravirt.h | 7 -- arch/powerpc/include/asm/qspinlock_types.h | 13 +++ arch/powerpc/include/asm/spinlock.h | 2 +- arch/powerpc/include/asm/spinlock_types.h | 2 +- arch/powerpc/lib/Makefile | 4 +- arch/powerpc/lib/qspinlock.c | 17 ++++ 8 files changed, 65 insertions(+), 69 deletions(-) delete mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_types.h create mode 100644 arch/powerpc/lib/qspinlock.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 699df27b0e2fc..7fbdf22ce9a97 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -96,7 +96,7 @@ config LOCKDEP_SUPPORT config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPTION + depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS config GENERIC_HWEIGHT bool @@ -154,7 +154,6 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS - select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b676c4fb90fd7..5e6257313557d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,83 +2,55 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include +#include +#include +#include #include -#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_unlock(struct qspinlock *lock); - -static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - if (!is_shared_processor()) - native_queued_spin_lock_slowpath(lock, val); - else - __pv_queued_spin_lock_slowpath(lock, val); + return atomic_read(&lock->val); } -#define queued_spin_unlock queued_spin_unlock -static inline void queued_spin_unlock(struct qspinlock *lock) +static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - if (!is_shared_processor()) - smp_store_release(&lock->locked, 0); - else - __pv_queued_spin_unlock(lock); + return !atomic_read(&lock.val); } -#else -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -#endif - -static __always_inline void queued_spin_lock(struct qspinlock *lock) +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - u32 val = 0; - - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) - return; - - queued_spin_lock_slowpath(lock, val); + return 0; } -#define queued_spin_lock queued_spin_lock -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define SPIN_THRESHOLD (1<<15) /* not tuned */ - -static __always_inline void pv_wait(u8 *ptr, u8 val) +static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - if (*ptr != val) - return; - yield_to_any(); - /* - * We could pass in a CPU here if waiting in the queue and yield to - * the previous CPU in the queue. - */ + return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0; } -static __always_inline void pv_kick(int cpu) +void queued_spin_lock_slowpath(struct qspinlock *lock); + +static __always_inline void queued_spin_lock(struct qspinlock *lock) { - prod_cpu(cpu); + if (!queued_spin_trylock(lock)) + queued_spin_lock_slowpath(lock); } -extern void __pv_init_lock_hash(void); - -static inline void pv_spinlocks_init(void) +static inline void queued_spin_unlock(struct qspinlock *lock) { - __pv_init_lock_hash(); + atomic_set_release(&lock->val, 0); } -#endif - -/* - * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, - * which was found to have performance problems if implemented with - * the preferred spin_begin()/spin_end() SMT priority pattern. Use the - * generic version instead. - */ +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) -#include +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void); +#else +static inline void pv_spinlocks_init(void) { } +#endif #endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h deleted file mode 100644 index 6b60e7736a47c..0000000000000 --- a/arch/powerpc/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H -#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H - -EXPORT_SYMBOL(__pv_queued_spin_unlock); - -#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h new file mode 100644 index 0000000000000..59606bc0c774e --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H +#define _ASM_POWERPC_QSPINLOCK_TYPES_H + +#include + +typedef struct qspinlock { + atomic_t val; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } + +#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index bd75872a6334a..7dafca8e3f02e 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -13,7 +13,7 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() -#ifndef CONFIG_PARAVIRT_SPINLOCKS +#ifndef CONFIG_PPC_QUEUED_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index d5f8a74ed2e8c..40b01446cf755 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ #endif #ifdef CONFIG_PPC_QUEUED_SPINLOCKS -#include +#include #include #else #include diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8560c912186df..4de71cbf6e8ee 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o copy_mc_64.o -ifndef CONFIG_PPC_QUEUED_SPINLOCKS +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else obj64-$(CONFIG_SMP) += locks.o endif diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 0000000000000..1c669b5b4607e --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + while (!queued_spin_trylock(lock)) + cpu_relax(); +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif -- GitLab From 84990b169557428c318df87b7836cd15f65b62dc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:16 +1000 Subject: [PATCH 461/694] powerpc/qspinlock: add mcs queueing for contended waiters This forms the basis of the qspinlock slow path. Like generic qspinlocks and unlike the vanilla MCS algorithm, the lock owner does not participate in the queue, only waiters. The first waiter spins on the lock word, then when the lock is released it takes ownership and unqueues the next waiter. This is how qspinlocks can be implemented with the spinlock API -- lock owners don't need a node, only waiters do. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-2-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 10 +- arch/powerpc/include/asm/qspinlock_types.h | 23 +++ arch/powerpc/lib/qspinlock.c | 187 ++++++++++++++++++++- 3 files changed, 214 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 5e6257313557d..6946dba5d0878 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -19,12 +19,12 @@ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - return 0; + return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK); } static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0; + return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0; } void queued_spin_lock_slowpath(struct qspinlock *lock); @@ -37,7 +37,11 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - atomic_set_release(&lock->val, 0); + for (;;) { + int val = atomic_read(&lock->val); + if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) + return; + } } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 59606bc0c774e..20a36dfb14e26 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -10,4 +10,27 @@ typedef struct qspinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +/* + * Bitfields in the lock word: + * + * 0: locked bit + * 1-16: unused bits + * 17-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +/* 0x00000001 */ +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 1 +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) + +/* 0xfffe0000 */ +#define _Q_TAIL_CPU_OFFSET 17 +#define _Q_TAIL_CPU_BITS 15 +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */ diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 1c669b5b4607e..86504628501e4 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -1,12 +1,193 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include #include -#include +#include +#include #include -void queued_spin_lock_slowpath(struct qspinlock *lock) +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); + +static inline int encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(int val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail) +{ + int val = atomic_read(&lock->val); + + BUG_ON(val & _Q_LOCKED_VAL); + + /* If we're the last queued, must clean up the tail. */ + if ((val & _Q_TAIL_CPU_MASK) == tail) { + if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val) + return val; + /* Another waiter must have enqueued */ + val = atomic_read(&lock->val); + BUG_ON(val & _Q_LOCKED_VAL); + } + + /* We must be the owner, just set the lock bit and acquire */ + atomic_or(_Q_LOCKED_VAL, &lock->val); + __atomic_acquire_fence(); + + return val; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail) +{ + for (;;) { + int val = atomic_read(&lock->val); + int newval = (val & ~_Q_TAIL_CPU_MASK) | tail; + int old; + + old = atomic_cmpxchg_release(&lock->val, val, newval); + if (old == val) + return old; + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, int val) +{ + int cpu = decode_tail_cpu(val); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { - while (!queued_spin_trylock(lock)) + struct qnodes *qnodesp; + struct qnode *next, *node; + int val, old, tail; + int idx; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->locked = 0; + + tail = encode_tail_cpu(smp_processor_id()); + + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + struct qnode *prev = get_tail_qnode(lock, old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + while (!node->locked) + cpu_relax(); + + smp_rmb(); /* acquire barrier for the mcs lock */ + } + + /* We're at the head of the waitqueue, wait for the lock. */ + for (;;) { + val = atomic_read(&lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + + cpu_relax(); + } + + /* If we're the last queued, must clean up the tail. */ + old = set_locked_clean_tail(lock, tail); + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* Another waiter must have enqueued */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + while (!(next = READ_ONCE(node->next))) cpu_relax(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + WRITE_ONCE(next->locked, 1); + +release: + qnodesp->count--; /* release the node */ +} + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + queued_spin_lock_mcs_queue(lock); } EXPORT_SYMBOL(queued_spin_lock_slowpath); -- GitLab From 4c93c2e4b9e8988511c06b9c042f23d4b8f593ad Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:17 +1000 Subject: [PATCH 462/694] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx. The first 16 bits of the lock are only modified by the owner, and other modifications always use atomic operations on the entire 32 bits, so unlocks can use plain stores on the 16 bits. This is the same kind of optimisation done by core qspinlock code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-3-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 6 +----- arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 6946dba5d0878..713f6629f6fbc 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -37,11 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { - for (;;) { - int val = atomic_read(&lock->val); - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val) - return; - } + smp_store_release(&lock->locked, 0); } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 20a36dfb14e26..fe87181c59e51 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -3,12 +3,27 @@ #define _ASM_POWERPC_QSPINLOCK_TYPES_H #include +#include typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + +#ifdef __LITTLE_ENDIAN + struct { + u16 locked; + u8 reserved[2]; + }; +#else + struct { + u8 reserved[2]; + u16 locked; + }; +#endif + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the lock word: -- GitLab From b3a73b7db2b6cb3b2e5bfda5518a0e92230ef673 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:18 +1000 Subject: [PATCH 463/694] powerpc/qspinlock: convert atomic operations to assembly This uses more optimal ll/sc style access patterns (rather than cmpxchg), and also sets the EH=1 lock hint on those operations which acquire ownership of the lock. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-4-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 24 +++++-- arch/powerpc/include/asm/qspinlock_types.h | 4 +- arch/powerpc/lib/qspinlock.c | 82 +++++++++++++--------- 3 files changed, 68 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 713f6629f6fbc..c16e1f0674b5a 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,29 +2,43 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include #include #include #include static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - return atomic_read(&lock->val); + return READ_ONCE(lock->val); } static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - return !atomic_read(&lock.val); + return !lock.val; } static __always_inline int queued_spin_is_contended(struct qspinlock *lock) { - return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK); + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } static __always_inline int queued_spin_trylock(struct qspinlock *lock) { - return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0; + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" +" cmpwi 0,%0,0 \n" +" bne- 2f \n" +" stwcx. %2,0,%1 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_LOCKED_VAL), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(prev == 0); } void queued_spin_lock_slowpath(struct qspinlock *lock); diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index fe87181c59e51..b9a5a52fa6703 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -7,7 +7,7 @@ typedef struct qspinlock { union { - atomic_t val; + u32 val; #ifdef __LITTLE_ENDIAN struct { @@ -23,7 +23,7 @@ typedef struct qspinlock { }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } } /* * Bitfields in the lock word: diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 86504628501e4..645d9affacfdb 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include @@ -22,12 +21,12 @@ struct qnodes { static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); -static inline int encode_tail_cpu(int cpu) +static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; } -static inline int decode_tail_cpu(int val) +static inline int decode_tail_cpu(u32 val) { return (val >> _Q_TAIL_CPU_OFFSET) - 1; } @@ -39,26 +38,34 @@ static inline int decode_tail_cpu(int val) * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued. */ -static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail) +static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail) { - int val = atomic_read(&lock->val); - - BUG_ON(val & _Q_LOCKED_VAL); - - /* If we're the last queued, must clean up the tail. */ - if ((val & _Q_TAIL_CPU_MASK) == tail) { - if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val) - return val; - /* Another waiter must have enqueued */ - val = atomic_read(&lock->val); - BUG_ON(val & _Q_LOCKED_VAL); - } - - /* We must be the owner, just set the lock bit and acquire */ - atomic_or(_Q_LOCKED_VAL, &lock->val); - __atomic_acquire_fence(); - - return val; + u32 newval = _Q_LOCKED_VAL; + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%6 # set_locked_clean_tail \n" + /* Test whether the lock tail == tail */ +" and %1,%0,%5 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%5 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + BUG_ON(prev & _Q_LOCKED_VAL); + + return prev; } /* @@ -68,20 +75,25 @@ static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tai * acquire barrier in get_tail_qnode() when the next CPU finds this tail * value. */ -static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail) +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) { - for (;;) { - int val = atomic_read(&lock->val); - int newval = (val & ~_Q_TAIL_CPU_MASK) | tail; - int old; - - old = atomic_cmpxchg_release(&lock->val, val, newval); - if (old == val) - return old; - } + u32 prev, tmp; + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; } -static struct qnode *get_tail_qnode(struct qspinlock *lock, int val) +static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); @@ -109,7 +121,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnodes *qnodesp; struct qnode *next, *node; - int val, old, tail; + u32 val, old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -156,7 +168,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) /* We're at the head of the waitqueue, wait for the lock. */ for (;;) { - val = atomic_read(&lock->val); + val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; -- GitLab From 6aa42f883c438ea132a28801bef3f86f3883d14c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:19 +1000 Subject: [PATCH 464/694] powerpc/qspinlock: allow new waiters to steal the lock before queueing Allow new waiters to "steal" the lock before queueing. That is, to acquire it while other CPUs have queued. This particularly helps paravirt performance when physical CPUs are oversubscribed, by keeping the lock from becoming a strict FIFO and vCPU preemption causing queue train wrecks. The new __queued_spin_trylock_steal() function is put in qspinlock.h to save having to move it, because it will be used there by a later change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-5-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 23 ++++++ arch/powerpc/lib/qspinlock.c | 110 ++++++++++++++++++++++++--- 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index c16e1f0674b5a..cebd2c89c08d4 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -41,6 +41,29 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) return likely(prev == 0); } +static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) +{ + u32 prev, tmp; + + /* Trylock may get ahead of queued nodes if it finds unlocked */ + asm volatile( +"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n" +" andc. %1,%0,%4 \n" +" bne- 2f \n" +" and %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(!(prev & ~_Q_TAIL_CPU_MASK)); +} + void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 645d9affacfdb..6ffd3261064c2 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -19,8 +19,17 @@ struct qnodes { struct qnode nodes[MAX_NODES]; }; +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static bool maybe_stealers __read_mostly = true; + static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static __always_inline int get_steal_spins(void) +{ + return steal_spins; +} + static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; @@ -38,33 +47,35 @@ static inline int decode_tail_cpu(u32 val) * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued. */ -static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail) +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) { u32 newval = _Q_LOCKED_VAL; u32 prev, tmp; asm volatile( -"1: lwarx %0,0,%2,%6 # set_locked_clean_tail \n" - /* Test whether the lock tail == tail */ -" and %1,%0,%5 \n" +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" " cmpw 0,%1,%3 \n" /* Merge the new locked value */ " or %1,%1,%4 \n" " bne 2f \n" /* If the lock tail matched, then clear it, otherwise leave it. */ -" andc %1,%1,%5 \n" +" andc %1,%1,%6 \n" "2: stwcx. %1,0,%2 \n" " bne- 1b \n" "\t" PPC_ACQUIRE_BARRIER " \n" "3: \n" : "=&r" (prev), "=&r" (tmp) : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); - BUG_ON(prev & _Q_LOCKED_VAL); - return prev; } @@ -117,6 +128,30 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } +static inline bool try_to_steal_lock(struct qspinlock *lock) +{ + int iters = 0; + + if (!steal_spins) + return false; + + /* Attempt to steal the lock */ + do { + u32 val = READ_ONCE(lock->val); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + if (__queued_spin_trylock_steal(lock)) + return true; + } else { + cpu_relax(); + } + + iters++; + } while (iters < get_steal_spins()); + + return false; +} + static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnodes *qnodesp; @@ -166,6 +201,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) smp_rmb(); /* acquire barrier for the mcs lock */ } +again: /* We're at the head of the waitqueue, wait for the lock. */ for (;;) { val = READ_ONCE(lock->val); @@ -176,9 +212,14 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) } /* If we're the last queued, must clean up the tail. */ - old = set_locked_clean_tail(lock, tail); + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + if ((old & _Q_TAIL_CPU_MASK) == tail) - goto release; /* Another waiter must have enqueued */ + goto release; /* We were the tail, no next. */ /* There is a next, must wait for node->next != NULL (MCS protocol) */ while (!(next = READ_ONCE(node->next))) @@ -199,6 +240,9 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) void queued_spin_lock_slowpath(struct qspinlock *lock) { + if (try_to_steal_lock(lock)) + return; + queued_spin_lock_mcs_queue(lock); } EXPORT_SYMBOL(queued_spin_lock_slowpath); @@ -208,3 +252,51 @@ void pv_spinlocks_init(void) { } #endif + +#include +static int steal_spins_set(void *data, u64 val) +{ + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + + return 0; +} +device_initcall(spinlock_debugfs_init); -- GitLab From 0944534ef4d5cf39c8133575524be0be3337dd62 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:20 +1000 Subject: [PATCH 465/694] powerpc/qspinlock: theft prevention to control latency Give the queue head the ability to stop stealers. After a number of spins without successfully acquiring the lock, the queue head sets this, which halts stealing and will assure it is the next owner. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-6-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock_types.h | 8 +++- arch/powerpc/lib/qspinlock.c | 53 ++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index b9a5a52fa6703..1911a8a162371 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -29,7 +29,8 @@ typedef struct qspinlock { * Bitfields in the lock word: * * 0: locked bit - * 1-16: unused bits + * 1-15: unused bits + * 16: must queue bit * 17-31: tail cpu (+1) */ #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ @@ -39,6 +40,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +/* 0x00010000 */ +#define _Q_MUST_Q_OFFSET 16 +#define _Q_MUST_Q_BITS 1 +#define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET) + /* 0xfffe0000 */ #define _Q_TAIL_CPU_OFFSET 17 #define _Q_TAIL_CPU_BITS 15 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 6ffd3261064c2..9cd442d46b9fd 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -22,6 +22,7 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); static bool maybe_stealers __read_mostly = true; +static int head_spins __read_mostly = (1 << 8); static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void) return steal_spins; } +static __always_inline int get_head_spins(void) +{ + return head_spins; +} + static inline u32 encode_tail_cpu(int cpu) { return (cpu + 1) << _Q_TAIL_CPU_OFFSET; @@ -104,6 +110,22 @@ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) return prev; } +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -139,6 +161,9 @@ static inline bool try_to_steal_lock(struct qspinlock *lock) do { u32 val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + if (unlikely(!(val & _Q_LOCKED_VAL))) { if (__queued_spin_trylock_steal(lock)) return true; @@ -157,7 +182,9 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) struct qnodes *qnodesp; struct qnode *next, *node; u32 val, old, tail; + bool mustq = false; int idx; + int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -209,6 +236,15 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) break; cpu_relax(); + if (!maybe_stealers) + continue; + iters++; + + if (!mustq && iters >= get_head_spins()) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } } /* If we're the last queued, must clean up the tail. */ @@ -293,9 +329,26 @@ static int steal_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); return 0; } -- GitLab From e1a31e7fd7130628cfd229253da2b4630e7a809c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:21 +1000 Subject: [PATCH 466/694] powerpc/qspinlock: store owner CPU in lock word Store the owner CPU number in the lock word so it may be yielded to, as powerpc's paravirtualised simple spinlocks do. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-7-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 12 ++++++++++-- arch/powerpc/include/asm/qspinlock_types.h | 12 +++++++++++- arch/powerpc/lib/qspinlock.c | 2 +- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index cebd2c89c08d4..9572a2ef974da 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -21,8 +21,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } +static __always_inline u32 queued_spin_encode_locked_val(void) +{ + /* XXX: make this use lock value in paca like simple spinlocks? */ + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); +} + static __always_inline int queued_spin_trylock(struct qspinlock *lock) { + u32 new = queued_spin_encode_locked_val(); u32 prev; asm volatile( @@ -34,7 +41,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) "\t" PPC_ACQUIRE_BARRIER " \n" "2: \n" : "=&r" (prev) - : "r" (&lock->val), "r" (_Q_LOCKED_VAL), + : "r" (&lock->val), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); @@ -43,6 +50,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) { + u32 new = queued_spin_encode_locked_val(); u32 prev, tmp; /* Trylock may get ahead of queued nodes if it finds unlocked */ @@ -57,7 +65,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) "\t" PPC_ACQUIRE_BARRIER " \n" "2: \n" : "=&r" (prev), "=&r" (tmp) - : "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), + : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), "i" (IS_ENABLED(CONFIG_PPC64)) : "cr0", "memory"); diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index 1911a8a162371..adfeed4aa495b 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -29,7 +29,8 @@ typedef struct qspinlock { * Bitfields in the lock word: * * 0: locked bit - * 1-15: unused bits + * 1-14: lock holder cpu + * 15: unused bit * 16: must queue bit * 17-31: tail cpu (+1) */ @@ -40,6 +41,15 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +/* 0x00007ffe */ +#define _Q_OWNER_CPU_OFFSET 1 +#define _Q_OWNER_CPU_BITS 14 +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) + +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + /* 0x00010000 */ #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 9cd442d46b9fd..4d74db0e565f2 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -55,7 +55,7 @@ static inline int decode_tail_cpu(u32 val) */ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) { - u32 newval = _Q_LOCKED_VAL; + u32 newval = queued_spin_encode_locked_val(); u32 prev, tmp; asm volatile( -- GitLab From 085f03311bcede99550e08a1f7cad41bf758b460 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:22 +1000 Subject: [PATCH 467/694] powerpc/qspinlock: paravirt yield to lock owner Waiters spinning on the lock word should yield to the lock owner if the vCPU is preempted. This improves performance when the hypervisor has oversubscribed physical CPUs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-8-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 99 +++++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 4d74db0e565f2..18e21574e6c59 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -5,6 +5,7 @@ #include #include #include +#include #define MAX_NODES 4 @@ -24,14 +25,16 @@ static int steal_spins __read_mostly = (1 << 5); static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); +static bool pv_yield_owner __read_mostly = true; + static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); -static __always_inline int get_steal_spins(void) +static __always_inline int get_steal_spins(bool paravirt) { return steal_spins; } -static __always_inline int get_head_spins(void) +static __always_inline int get_head_spins(bool paravirt) { return head_spins; } @@ -46,6 +49,11 @@ static inline int decode_tail_cpu(u32 val) return (val >> _Q_TAIL_CPU_OFFSET) - 1; } +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + /* * Try to acquire the lock if it was not already locked. If the tail matches * mytail then clear it, otherwise leave it unchnaged. Return previous value. @@ -150,7 +158,45 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -static inline bool try_to_steal_lock(struct qspinlock *lock) +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + int owner; + u32 yield_count; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + yield_to_preempted(owner, yield_count); + /* Don't relax if we yielded. Maybe we should? */ + return; + } +relax: + cpu_relax(); +} + + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { int iters = 0; @@ -168,16 +214,16 @@ static inline bool try_to_steal_lock(struct qspinlock *lock) if (__queued_spin_trylock_steal(lock)) return true; } else { - cpu_relax(); + yield_to_locked_owner(lock, val, paravirt); } iters++; - } while (iters < get_steal_spins()); + } while (iters < get_steal_spins(paravirt)); return false; } -static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) { struct qnodes *qnodesp; struct qnode *next, *node; @@ -235,12 +281,12 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) if (!(val & _Q_LOCKED_VAL)) break; - cpu_relax(); + yield_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; iters++; - if (!mustq && iters >= get_head_spins()) { + if (!mustq && iters >= get_head_spins(paravirt)) { mustq = true; set_mustq(lock); val |= _Q_MUST_Q_VAL; @@ -276,10 +322,20 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) void queued_spin_lock_slowpath(struct qspinlock *lock) { - if (try_to_steal_lock(lock)) - return; - - queued_spin_lock_mcs_queue(lock); + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) + return; + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) + return; + queued_spin_lock_mcs_queue(lock, false); + } } EXPORT_SYMBOL(queued_spin_lock_slowpath); @@ -345,10 +401,29 @@ static int head_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + } return 0; } -- GitLab From bd48287b2cf4cd6e95576db3a94fd2a7cdf9832d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:23 +1000 Subject: [PATCH 468/694] powerpc/qspinlock: implement option to yield to previous node Queued waiters which are not at the head of the queue don't spin on the lock word but their qnode lock word, waiting for the previous queued CPU to release them. Add an option which allows these waiters to yield to the previous CPU if its vCPU is preempted. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-9-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 18e21574e6c59..41afd8e689186 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_prev __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -195,6 +196,32 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va cpu_relax(); } +static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +{ + int prev_cpu = decode_tail_cpu(val); + u32 yield_count; + + if (!paravirt) + goto relax; + + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + smp_rmb(); /* See yield_to_locked_owner comment */ + + if (!node->locked) { + yield_to_preempted(prev_cpu, yield_count); + return; + } + +relax: + cpu_relax(); +} + static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { @@ -269,7 +296,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ while (!node->locked) - cpu_relax(); + yield_to_prev(lock, node, old, paravirt); smp_rmb(); /* acquire barrier for the mcs lock */ } @@ -417,12 +444,29 @@ static int pv_yield_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); } return 0; -- GitLab From b4c3cdc1a698a2f6168768d0bed4bf062723722e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:24 +1000 Subject: [PATCH 469/694] powerpc/qspinlock: allow stealing when head of queue yields If the head of queue is preventing stealing but it finds the owner vCPU is preempted, it will yield its cycles to the owner which could cause it to become preempted. Add an option to re-allow stealers before yielding, and disallow them again after returning from the yield. Disable this option by default for now, i.e., no logical change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-10-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 59 ++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 41afd8e689186..c1f3b699b63f7 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true; static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -135,6 +136,22 @@ static __always_inline u32 set_mustq(struct qspinlock *lock) return prev; } +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -159,7 +176,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; u32 yield_count; @@ -188,7 +205,11 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va smp_rmb(); if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); /* Don't relax if we yielded. Maybe we should? */ return; } @@ -196,6 +217,21 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va cpu_relax(); } +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + __yield_to_locked_owner(lock, val, paravirt, false); +} + +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + __yield_to_locked_owner(lock, val, paravirt, mustq); +} + static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); @@ -211,7 +247,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ - smp_rmb(); /* See yield_to_locked_owner comment */ + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); @@ -308,7 +344,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b if (!(val & _Q_LOCKED_VAL)) break; - yield_to_locked_owner(lock, val, paravirt); + yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; iters++; @@ -444,6 +480,22 @@ static int pv_yield_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -466,6 +518,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); } -- GitLab From 28db61e207ea3890d286cff3141c1ce67346074d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:25 +1000 Subject: [PATCH 470/694] powerpc/qspinlock: allow propagation of yield CPU down the queue Having all CPUs poll the lock word for the owner CPU that should be yielded to defeats most of the purpose of using MCS queueing for scalability. Yet it may be desirable for queued waiters to yield to a preempted owner. With this change, queue waiters never sample the owner CPU directly from the lock word. The queue head (which is spinning on the lock) propagates the owner CPU back to the next waiter if it finds the owner has been preempted. That waiter then propagates the owner CPU back to the next waiter, and so on. s390 addresses this problem differenty, by having queued waiters sample the lock word to find the owner at a low frequency. That has the advantage of being simpler, the advantage of propagation is that the lock word never has to be accesed by queued waiters, and the transfer of cache lines to transmit the owner data is only required when lock holder vCPU preemption occurs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-11-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index c1f3b699b63f7..c45f30c9a19e8 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -12,6 +12,7 @@ struct qnode { struct qnode *next; struct qspinlock *lock; + int yield_cpu; u8 locked; /* 1 if lock acquired */ }; @@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_propagate_owner __read_mostly = true; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u __yield_to_locked_owner(lock, val, paravirt, mustq); } +static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_propagate_owner) + return; + + owner = get_owner_cpu(val); + if (*set_yield_cpu == owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (vcpu_is_preempted(owner)) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } else if (*set_yield_cpu != -1) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } +} + static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); u32 yield_count; + int yield_cpu; if (!paravirt) goto relax; + if (!pv_yield_propagate_owner) + goto yield_prev; + + yield_cpu = READ_ONCE(node->yield_cpu); + if (yield_cpu == -1) { + /* Propagate back the -1 CPU */ + if (node->next && node->next->yield_cpu != -1) + node->next->yield_cpu = yield_cpu; + goto yield_prev; + } + + yield_count = yield_count_of(yield_cpu); + if ((yield_count & 1) == 0) + goto yield_prev; /* owner vcpu is running */ + + smp_rmb(); + + if (yield_cpu == node->yield_cpu) { + if (node->next && node->next->yield_cpu != yield_cpu) + node->next->yield_cpu = yield_cpu; + yield_to_preempted(yield_cpu, yield_count); + return; + } + +yield_prev: if (!pv_yield_prev) goto relax; @@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b u32 val, old, tail; bool mustq = false; int idx; + int set_yield_cpu = -1; int iters = 0; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node = &qnodesp->nodes[idx]; node->next = NULL; node->lock = lock; + node->yield_cpu = -1; node->locked = 0; tail = encode_tail_cpu(smp_processor_id()); @@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b while (!node->locked) yield_to_prev(lock, node, old, paravirt); + /* Clear out stale propagated yield_cpu */ + if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) + node->yield_cpu = -1; + smp_rmb(); /* acquire barrier for the mcs lock */ } @@ -344,6 +405,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b if (!(val & _Q_LOCKED_VAL)) break; + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; @@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); +static int pv_yield_propagate_owner_set(void *data, u64 val) +{ + pv_yield_propagate_owner = !!val; + + return 0; +} + +static int pv_yield_propagate_owner_get(void *data, u64 *val) +{ + *val = pv_yield_propagate_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); @@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); } return 0; -- GitLab From be742c573fdafcfa1752642ca1c7aaf08c258128 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:26 +1000 Subject: [PATCH 471/694] powerpc/qspinlock: add ability to prod new queue head CPU After the head of the queue acquires the lock, it releases the next waiter in the queue to become the new head. Add an option to prod the new head if its vCPU was preempted. This may only have an effect if queue waiters are yielding. Disable this option by default for now, i.e., no logical change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-12-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index c45f30c9a19e8..2f6c0bed25ead 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -12,6 +12,7 @@ struct qnode { struct qnode *next; struct qspinlock *lock; + int cpu; int yield_cpu; u8 locked; /* 1 if lock acquired */ }; @@ -30,6 +31,7 @@ static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); @@ -370,10 +372,11 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node = &qnodesp->nodes[idx]; node->next = NULL; node->lock = lock; + node->cpu = smp_processor_id(); node->yield_cpu = -1; node->locked = 0; - tail = encode_tail_cpu(smp_processor_id()); + tail = encode_tail_cpu(node->cpu); old = publish_tail_cpu(lock, tail); @@ -439,7 +442,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b * this store to locked. The corresponding barrier is the smp_rmb() * acquire barrier for mcs lock, above. */ - WRITE_ONCE(next->locked, 1); + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + } release: qnodesp->count--; /* release the node */ @@ -590,6 +600,22 @@ static int pv_yield_propagate_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); @@ -599,6 +625,7 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); } return 0; -- GitLab From f61ab43cc1a6146d6eef7e0713a452c3677ad13e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:27 +1000 Subject: [PATCH 472/694] powerpc/qspinlock: allow lock stealing in trylock and lock fastpath This change allows trylock to steal the lock. It also allows the initial lock attempt to steal the lock rather than bailing out and going to the slow path. This gives trylock more strength: without this a continually-contended lock will never permit a trylock to succeed. With this change, the trylock has a small but non-zero chance. It also gives the lock fastpath most of the benefit of passing the reservation back through to the steal loop in the slow path without the complexity. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-13-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 22 ++++++++++++++++++++-- arch/powerpc/lib/qspinlock.c | 9 +++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 9572a2ef974da..93b1c976db8a5 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -6,6 +6,15 @@ #include #include +/* + * The trylock itself may steal. This makes trylocks slightly stronger, and + * might make spin locks slightly more efficient when stealing. + * + * This is compile-time, so if true then there may always be stealers, so the + * nosteal paths become unused. + */ +#define _Q_SPIN_TRY_LOCK_STEAL 1 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { return READ_ONCE(lock->val); @@ -27,13 +36,14 @@ static __always_inline u32 queued_spin_encode_locked_val(void) return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); } -static __always_inline int queued_spin_trylock(struct qspinlock *lock) +static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) { u32 new = queued_spin_encode_locked_val(); u32 prev; + /* Trylock succeeds only when unlocked and no queued nodes */ asm volatile( -"1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" +"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n" " cmpwi 0,%0,0 \n" " bne- 2f \n" " stwcx. %2,0,%1 \n" @@ -72,6 +82,14 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) return likely(!(prev & ~_Q_TAIL_CPU_MASK)); } +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!_Q_SPIN_TRY_LOCK_STEAL) + return __queued_spin_trylock_nosteal(lock); + else + return __queued_spin_trylock_steal(lock); +} + void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 2f6c0bed25ead..8e5b8bc3f0947 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -24,7 +24,11 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else static bool maybe_stealers __read_mostly = true; +#endif static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; @@ -483,6 +487,10 @@ void pv_spinlocks_init(void) #include static int steal_spins_set(void *data, u64 val) { +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else static DEFINE_MUTEX(lock); /* @@ -507,6 +515,7 @@ static int steal_spins_set(void *data, u64 val) steal_spins = val; } mutex_unlock(&lock); +#endif return 0; } -- GitLab From 71c235027ce7940434acd3f553602ad8b5d36469 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:28 +1000 Subject: [PATCH 473/694] powerpc/qspinlock: use spin_begin/end API Use the spin_begin/spin_cpu_relax/spin_end APIs in qspinlock, which helps to prevent threads issuing a lot of expensive priority nops which may not have much effect due to immediately executing low then medium priority. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-14-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 39 ++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 8e5b8bc3f0947..36aff7defda84 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -184,6 +184,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } +/* Called inside spin_begin() */ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; @@ -203,6 +204,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ + spin_end(); + /* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the @@ -218,18 +221,22 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 yield_to_preempted(owner, yield_count); if (mustq) set_mustq(lock); + spin_begin(); /* Don't relax if we yielded. Maybe we should? */ return; } + spin_begin(); relax: - cpu_relax(); + spin_cpu_relax(); } +/* Called inside spin_begin() */ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { __yield_to_locked_owner(lock, val, paravirt, false); } +/* Called inside spin_begin() */ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { bool mustq = false; @@ -267,6 +274,7 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int } } +/* Called inside spin_begin() */ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); @@ -291,14 +299,18 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto yield_prev; /* owner vcpu is running */ + spin_end(); + smp_rmb(); if (yield_cpu == node->yield_cpu) { if (node->next && node->next->yield_cpu != yield_cpu) node->next->yield_cpu = yield_cpu; yield_to_preempted(yield_cpu, yield_count); + spin_begin(); return; } + spin_begin(); yield_prev: if (!pv_yield_prev) @@ -308,15 +320,19 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */ + spin_end(); + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); + spin_begin(); return; } + spin_begin(); relax: - cpu_relax(); + spin_cpu_relax(); } @@ -328,6 +344,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav return false; /* Attempt to steal the lock */ + spin_begin(); + do { u32 val = READ_ONCE(lock->val); @@ -335,8 +353,10 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav break; if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); if (__queued_spin_trylock_steal(lock)) return true; + spin_begin(); } else { yield_to_locked_owner(lock, val, paravirt); } @@ -344,6 +364,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav iters++; } while (iters < get_steal_spins(paravirt)); + spin_end(); + return false; } @@ -395,8 +417,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b WRITE_ONCE(prev->next, node); /* Wait for mcs node lock to be released */ + spin_begin(); while (!node->locked) yield_to_prev(lock, node, old, paravirt); + spin_end(); /* Clear out stale propagated yield_cpu */ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) @@ -407,6 +431,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b again: /* We're at the head of the waitqueue, wait for the lock. */ + spin_begin(); for (;;) { val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) @@ -424,6 +449,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b val |= _Q_MUST_Q_VAL; } } + spin_end(); /* If we're the last queued, must clean up the tail. */ old = trylock_clean_tail(lock, tail); @@ -436,8 +462,13 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b goto release; /* We were the tail, no next. */ /* There is a next, must wait for node->next != NULL (MCS protocol) */ - while (!(next = READ_ONCE(node->next))) - cpu_relax(); + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } /* * Unlock the next mcs waiter node. Release barrier is not required -- GitLab From cc79701114154efe79663ba47d9e51aad2ed3c78 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:29 +1000 Subject: [PATCH 474/694] powerpc/qspinlock: reduce remote node steal spins Allow for a reduction in the number of times a CPU from a different node than the owner can attempt to steal the lock before queueing. This could bias the transfer behaviour of the lock across the machine and reduce NUMA crossings. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-15-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 43 +++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 36aff7defda84..8c6b5ef871184 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); #if _Q_SPIN_TRY_LOCK_STEAL == 1 static const bool maybe_stealers = true; #else @@ -44,6 +46,11 @@ static __always_inline int get_steal_spins(bool paravirt) return steal_spins; } +static __always_inline int get_remote_steal_spins(bool paravirt) +{ + return remote_steal_spins; +} + static __always_inline int get_head_spins(bool paravirt) { return head_spins; @@ -335,10 +342,24 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_cpu_relax(); } +static __always_inline bool steal_break(u32 val, int iters, bool paravirt) +{ + if (iters >= get_steal_spins(paravirt)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { int iters = 0; + u32 val; if (!steal_spins) return false; @@ -347,8 +368,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav spin_begin(); do { - u32 val = READ_ONCE(lock->val); - + val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; @@ -362,7 +382,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav } iters++; - } while (iters < get_steal_spins(paravirt)); + } while (!steal_break(val, iters, paravirt)); spin_end(); @@ -560,6 +580,22 @@ static int steal_spins_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + static int head_spins_set(void *data, u64 val) { head_spins = val; @@ -659,6 +695,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, " static __init int spinlock_debugfs_init(void) { debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); -- GitLab From 39dfc73596b48bb50cf7e4f3f54e38427dda5b4e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:30 +1000 Subject: [PATCH 475/694] powerpc/qspinlock: allow indefinite spinning on a preempted owner Provide an option that holds off queueing indefinitely while the lock owner is preempted. This could reduce queueing latencies for very overcommitted vcpu situations. This is disabled by default. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-16-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 77 +++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 8c6b5ef871184..eeaaecfd5b774 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -35,6 +35,7 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; @@ -191,11 +192,12 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -/* Called inside spin_begin() */ -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) { int owner; u32 yield_count; + bool preempted = false; BUG_ON(!(val & _Q_LOCKED_VAL)); @@ -213,6 +215,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + preempted = true; + /* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the @@ -229,29 +233,32 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 if (mustq) set_mustq(lock); spin_begin(); + /* Don't relax if we yielded. Maybe we should? */ - return; + return preempted; } spin_begin(); relax: spin_cpu_relax(); + + return preempted; } -/* Called inside spin_begin() */ -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { - __yield_to_locked_owner(lock, val, paravirt, false); + return __yield_to_locked_owner(lock, val, paravirt, false); } -/* Called inside spin_begin() */ -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) { bool mustq = false; if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) mustq = true; - __yield_to_locked_owner(lock, val, paravirt, mustq); + return __yield_to_locked_owner(lock, val, paravirt, mustq); } static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) @@ -361,13 +368,16 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav int iters = 0; u32 val; - if (!steal_spins) + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ return false; + } /* Attempt to steal the lock */ spin_begin(); - do { + bool preempted = false; + val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; @@ -378,10 +388,23 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav return true; spin_begin(); } else { - yield_to_locked_owner(lock, val, paravirt); + preempted = yield_to_locked_owner(lock, val, paravirt); } - iters++; + if (preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } } while (!steal_break(val, iters, paravirt)); spin_end(); @@ -453,15 +476,22 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* We're at the head of the waitqueue, wait for the lock. */ spin_begin(); for (;;) { + bool preempted; + val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); - yield_head_to_locked_owner(lock, val, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; - iters++; + if (preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } if (!mustq && iters >= get_head_spins(paravirt)) { mustq = true; @@ -644,6 +674,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -700,6 +746,7 @@ static __init int spinlock_debugfs_init(void) if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); -- GitLab From 12b459a5ebf3308e718bc1dd48acb7c4cf7f1a75 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:31 +1000 Subject: [PATCH 476/694] powerpc/qspinlock: provide accounting and options for sleepy locks Finding the owner or a queued waiter on a lock with a preempted vcpu is indicative of an oversubscribed guest causing the lock to get into trouble. Provide some options to detect this situation and have new CPUs avoid queueing for a longer time (more steal iterations) to minimise the problems caused by vcpu preemption on the queue. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-17-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock_types.h | 7 +- arch/powerpc/lib/qspinlock.c | 242 +++++++++++++++++++-- 2 files changed, 230 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h index adfeed4aa495b..4766a7aa03cbc 100644 --- a/arch/powerpc/include/asm/qspinlock_types.h +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -30,7 +30,7 @@ typedef struct qspinlock { * * 0: locked bit * 1-14: lock holder cpu - * 15: unused bit + * 15: lock owner or queuer vcpus observed to be preempted bit * 16: must queue bit * 17-31: tail cpu (+1) */ @@ -50,6 +50,11 @@ typedef struct qspinlock { #error "qspinlock does not support such large CONFIG_NR_CPUS" #endif +/* 0x00008000 */ +#define _Q_SLEEPY_OFFSET 15 +#define _Q_SLEEPY_BITS 1 +#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET) + /* 0x00010000 */ #define _Q_MUST_Q_OFFSET 16 #define _Q_MUST_Q_BITS 1 diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index eeaaecfd5b774..0f33a07c1d192 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -36,25 +37,56 @@ static int head_spins __read_mostly = (1 << 8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); -static __always_inline int get_steal_spins(bool paravirt) +static __always_inline bool recently_sleepy(void) { - return steal_spins; + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; } -static __always_inline int get_remote_steal_spins(bool paravirt) +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) { - return remote_steal_spins; + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; } -static __always_inline int get_head_spins(bool paravirt) +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) { - return head_spins; + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; } static inline u32 encode_tail_cpu(int cpu) @@ -168,6 +200,56 @@ static __always_inline u32 clear_mustq(struct qspinlock *lock) return prev; } +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (val & _Q_LOCKED_VAL) { + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } + } +} + static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) { int cpu = decode_tail_cpu(val); @@ -215,6 +297,7 @@ static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + seen_sleepy_owner(lock, val); preempted = true; /* @@ -289,11 +372,12 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int } /* Called inside spin_begin() */ -static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) { int prev_cpu = decode_tail_cpu(val); u32 yield_count; int yield_cpu; + bool preempted = false; if (!paravirt) goto relax; @@ -315,6 +399,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_end(); + preempted = true; + seen_sleepy_node(lock, val); + smp_rmb(); if (yield_cpu == node->yield_cpu) { @@ -322,7 +409,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * node->next->yield_cpu = yield_cpu; yield_to_preempted(yield_cpu, yield_count); spin_begin(); - return; + return preempted; } spin_begin(); @@ -336,26 +423,31 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * spin_end(); + preempted = true; + seen_sleepy_node(lock, val); + smp_rmb(); /* See __yield_to_locked_owner comment */ if (!node->locked) { yield_to_preempted(prev_cpu, yield_count); spin_begin(); - return; + return preempted; } spin_begin(); relax: spin_cpu_relax(); + + return preempted; } -static __always_inline bool steal_break(u32 val, int iters, bool paravirt) +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) { - if (iters >= get_steal_spins(paravirt)) + if (iters >= get_steal_spins(paravirt, sleepy)) return true; if (IS_ENABLED(CONFIG_NUMA) && - (iters >= get_remote_steal_spins(paravirt))) { + (iters >= get_remote_steal_spins(paravirt, sleepy))) { int cpu = get_owner_cpu(val); if (numa_node_id() != cpu_to_node(cpu)) return true; @@ -365,6 +457,8 @@ static __always_inline bool steal_break(u32 val, int iters, bool paravirt) static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { + bool seen_preempted = false; + bool sleepy = false; int iters = 0; u32 val; @@ -391,7 +485,25 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav preempted = yield_to_locked_owner(lock, val, paravirt); } + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + if (preempted) { + seen_preempted = true; + sleepy = true; if (!pv_spin_on_preempted_owner) iters++; /* @@ -405,7 +517,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav } else { iters++; } - } while (!steal_break(val, iters, paravirt)); + } while (!steal_break(val, iters, paravirt, sleepy)); spin_end(); @@ -417,6 +529,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b struct qnodes *qnodesp; struct qnode *next, *node; u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; bool mustq = false; int idx; int set_yield_cpu = -1; @@ -461,8 +575,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ spin_begin(); - while (!node->locked) - yield_to_prev(lock, node, old, paravirt); + while (!node->locked) { + if (yield_to_prev(lock, node, old, paravirt)) + seen_preempted = true; + } spin_end(); /* Clear out stale propagated yield_cpu */ @@ -472,8 +588,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b smp_rmb(); /* acquire barrier for the mcs lock */ } -again: /* We're at the head of the waitqueue, wait for the lock. */ +again: spin_begin(); for (;;) { bool preempted; @@ -482,18 +598,40 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b if (!(val & _Q_LOCKED_VAL)) break; + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); preempted = yield_head_to_locked_owner(lock, val, paravirt); if (!maybe_stealers) continue; - if (preempted) { + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + if (!pv_spin_on_preempted_owner) iters++; } else { iters++; } - if (!mustq && iters >= get_head_spins(paravirt)) { + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { mustq = true; set_mustq(lock); val |= _Q_MUST_Q_VAL; @@ -690,6 +828,70 @@ static int pv_spin_on_preempted_owner_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -747,6 +949,10 @@ static __init int spinlock_debugfs_init(void) debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); -- GitLab From 0b2199841a7952d01a717b465df028b40b2cf3e9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 26 Nov 2022 19:59:32 +1000 Subject: [PATCH 477/694] powerpc/qspinlock: add compile-time tuning adjustments This adds compile-time options that allow the EH lock hint bit to be enabled or disabled, and adds some new options that may or may not help matters. To help with experimentation and tuning. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126095932.1234527-18-npiggin@gmail.com --- arch/powerpc/include/asm/qspinlock.h | 61 ++++++++++++++++++++++++++-- arch/powerpc/lib/qspinlock.c | 39 ++++++++++++++++-- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 93b1c976db8a5..28a53fb69b381 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -6,15 +6,68 @@ #include #include +#ifdef CONFIG_PPC64 +/* + * Use the EH=1 hint for accesses that result in the lock being acquired. + * The hardware is supposed to optimise this pattern by holding the lock + * cacheline longer, and releasing when a store to the same memory (the + * unlock) is performed. + */ +#define _Q_SPIN_EH_HINT 1 +#else +#define _Q_SPIN_EH_HINT 0 +#endif + /* * The trylock itself may steal. This makes trylocks slightly stronger, and - * might make spin locks slightly more efficient when stealing. + * makes locks slightly more efficient when stealing. * * This is compile-time, so if true then there may always be stealers, so the * nosteal paths become unused. */ #define _Q_SPIN_TRY_LOCK_STEAL 1 +/* + * Put a speculation barrier after testing the lock/node and finding it + * busy. Try to prevent pointless speculation in slow paths. + * + * Slows down the lockstorm microbenchmark with no stealing, where locking + * is purely FIFO through the queue. May have more benefit in real workload + * where speculating into the wrong place could have a greater cost. + */ +#define _Q_SPIN_SPEC_BARRIER 0 + +#ifdef CONFIG_PPC64 +/* + * Execute a miso instruction after passing the MCS lock ownership to the + * queue head. Miso is intended to make stores visible to other CPUs sooner. + * + * This seems to make the lockstorm microbenchmark nospin test go slightly + * faster on POWER10, but disable for now. + */ +#define _Q_SPIN_MISO 0 +#else +#define _Q_SPIN_MISO 0 +#endif + +#ifdef CONFIG_PPC64 +/* + * This executes miso after an unlock of the lock word, having ownership + * pass to the next CPU sooner. This will slow the uncontended path to some + * degree. Not evidence it helps yet. + */ +#define _Q_SPIN_MISO_UNLOCK 0 +#else +#define _Q_SPIN_MISO_UNLOCK 0 +#endif + +/* + * Seems to slow down lockstorm microbenchmark, suspect queue node just + * has to become shared again right afterwards when its waiter spins on + * the lock field. + */ +#define _Q_SPIN_PREFETCH_NEXT 0 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { return READ_ONCE(lock->val); @@ -52,7 +105,7 @@ static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) "2: \n" : "=&r" (prev) : "r" (&lock->val), "r" (new), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return likely(prev == 0); @@ -76,7 +129,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) "2: \n" : "=&r" (prev), "=&r" (tmp) : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return likely(!(prev & ~_Q_TAIL_CPU_MASK)); @@ -101,6 +154,8 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static inline void queued_spin_unlock(struct qspinlock *lock) { smp_store_release(&lock->locked, 0); + if (_Q_SPIN_MISO_UNLOCK) + asm volatile("miso" ::: "memory"); } #define arch_spin_is_locked(l) queued_spin_is_locked(l) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 0f33a07c1d192..1cf5d3e752501 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -48,6 +48,12 @@ static bool pv_prod_head __read_mostly = false; static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + static __always_inline bool recently_sleepy(void) { /* pv_sleepy_lock is true when this is called */ @@ -137,7 +143,7 @@ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) : "r" (&lock->val), "r"(tail), "r" (newval), "i" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), - "i" (IS_ENABLED(CONFIG_PPC64)) + "i" (_Q_SPIN_EH_HINT) : "cr0", "memory"); return prev; @@ -475,6 +481,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break; + spec_barrier(); if (unlikely(!(val & _Q_LOCKED_VAL))) { spin_end(); @@ -540,6 +547,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b qnodesp = this_cpu_ptr(&qnodes); if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); while (!queued_spin_trylock(lock)) cpu_relax(); return; @@ -576,9 +584,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* Wait for mcs node lock to be released */ spin_begin(); while (!node->locked) { + spec_barrier(); + if (yield_to_prev(lock, node, old, paravirt)) seen_preempted = true; } + spec_barrier(); spin_end(); /* Clear out stale propagated yield_cpu */ @@ -586,6 +597,17 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b node->yield_cpu = -1; smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } } /* We're at the head of the waitqueue, wait for the lock. */ @@ -597,6 +619,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break; + spec_barrier(); if (paravirt && pv_sleepy_lock && maybe_stealers) { if (!sleepy) { @@ -637,6 +660,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b val |= _Q_MUST_Q_VAL; } } + spec_barrier(); spin_end(); /* If we're the last queued, must clean up the tail. */ @@ -657,6 +681,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b cpu_relax(); spin_end(); } + spec_barrier(); /* * Unlock the next mcs waiter node. Release barrier is not required @@ -668,10 +693,14 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b if (paravirt && pv_prod_head) { int next_cpu = next->cpu; WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); if (vcpu_is_preempted(next_cpu)) prod_cpu(next_cpu); } else { WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); } release: @@ -686,12 +715,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock) * is passed as the paravirt argument to the functions. */ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { - if (try_to_steal_lock(lock, true)) + if (try_to_steal_lock(lock, true)) { + spec_barrier(); return; + } queued_spin_lock_mcs_queue(lock, true); } else { - if (try_to_steal_lock(lock, false)) + if (try_to_steal_lock(lock, false)) { + spec_barrier(); return; + } queued_spin_lock_mcs_queue(lock, false); } } -- GitLab From c28c15b6d28a776538482101522cbcd9f906b15c Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Wed, 9 Nov 2022 15:51:11 +1100 Subject: [PATCH 478/694] powerpc/code-patching: Use temporary mm for Radix MMU x86 supports the notion of a temporary mm which restricts access to temporary PTEs to a single CPU. A temporary mm is useful for situations where a CPU needs to perform sensitive operations (such as patching a STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing said mappings to other CPUs. Another benefit is that other CPU TLBs do not need to be flushed when the temporary mm is torn down. Mappings in the temporary mm can be set in the userspace portion of the address-space. Interrupts must be disabled while the temporary mm is in use. HW breakpoints, which may have been set by userspace as watchpoints on addresses now within the temporary mm, are saved and disabled when loading the temporary mm. The HW breakpoints are restored when unloading the temporary mm. All HW breakpoints are indiscriminately disabled while the temporary mm is in use - this may include breakpoints set by perf. Use the `poking_init` init hook to prepare a temporary mm and patching address. Initialize the temporary mm using mm_alloc(). Choose a randomized patching address inside the temporary mm userspace address space. The patching address is randomized between PAGE_SIZE and DEFAULT_MAP_WINDOW-PAGE_SIZE. Bits of entropy with 64K page size on BOOK3S_64: bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE) PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB bits of entropy = log2(128TB / 64K) bits of entropy = 31 The upper limit is DEFAULT_MAP_WINDOW due to how the Book3s64 Hash MMU operates - by default the space above DEFAULT_MAP_WINDOW is not available. Currently the Hash MMU does not use a temporary mm so technically this upper limit isn't necessary; however, a larger randomization range does not further "harden" this overall approach and future work may introduce patching with a temporary mm on Hash as well. Randomization occurs only once during initialization for each CPU as it comes online. The patching page is mapped with PAGE_KERNEL to set EAA[0] for the PTE which ignores the AMR (so no need to unlock/lock KUAP) according to PowerISA v3.0b Figure 35 on Radix. Based on x86 implementation: commit 4fc19708b165 ("x86/alternatives: Initialize temporary mm for patching") and: commit b3fd8e83ada0 ("x86/alternatives: Use temporary mm for text poking") From: Benjamin Gray Synchronisation is done according to ISA 3.1B Book 3 Chapter 13 "Synchronization Requirements for Context Alterations". Switching the mm is a change to the PID, which requires a CSI before and after the change, and a hwsync between the last instruction that performs address translation for an associated storage access. Instruction fetch is an associated storage access, but the instruction address mappings are not being changed, so it should not matter which context they use. We must still perform a hwsync to guard arbitrary prior code that may have accessed a userspace address. TLB invalidation is local and VA specific. Local because only this core used the patching mm, and VA specific because we only care that the writable mapping is purged. Leaving the other mappings intact is more efficient, especially when performing many code patches in a row (e.g., as ftrace would). Signed-off-by: Christopher M. Riedl Signed-off-by: Benjamin Gray [mpe: Use mm_alloc() per 107b6828a7cd ("x86/mm: Use mm_alloc() in poking_init()")] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-9-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 177 ++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3055eef7dcdcc..a1902241ff5dd 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -4,12 +4,17 @@ */ #include +#include +#include #include #include #include #include #include +#include +#include +#include #include #include #include @@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) } #ifdef CONFIG_STRICT_KERNEL_RWX + static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); +static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); +static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} + +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; @@ -79,14 +127,86 @@ static int text_area_cpu_down(unsigned int cpu) return 0; } +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) +{ + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} + +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_mm, mm); + this_cpu_write(cpu_patching_addr, addr); + this_cpu_write(cpu_patching_pte, pte); + + return 0; + +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; +} + +static int text_area_cpu_down_mm(unsigned int cpu) +{ + put_patching_mm(this_cpu_read(cpu_patching_mm), + this_cpu_read(cpu_patching_addr)); + + this_cpu_write(cpu_patching_mm, NULL); + this_cpu_write(cpu_patching_addr, 0); + this_cpu_write(cpu_patching_pte, NULL); + + return 0; +} + static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); void __init poking_init(void) { - int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", - text_area_cpu_up, - text_area_cpu_down); + int ret; + + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); + else + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); /* cpuhp_setup_state returns >= 0 on success */ if (WARN_ON(ret < 0)) @@ -148,6 +268,50 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } +static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + + patching_mm = __this_cpu_read(cpu_patching_mm); + pte = __this_cpu_read(cpu_patching_pte); + text_poke_addr = __this_cpu_read(cpu_patching_addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instruction(addr, instr, patch_addr); + + /* hwsync performed by __patch_instruction (sync) if successful */ + if (err) + mb(); /* sync */ + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + return err; +} + static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; @@ -187,7 +351,10 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return raw_patch_instruction(addr, instr); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + if (mm_patch_enabled()) + err = __do_patch_instruction_mm(addr, instr); + else + err = __do_patch_instruction(addr, instr); local_irq_restore(flags); return err; -- GitLab From 2f228ee1ade5d8d1f26cf94863a36c5693023c58 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:12 +1100 Subject: [PATCH 479/694] powerpc/code-patching: Consolidate and cache per-cpu patching context With the temp mm context support, there are CPU local variables to hold the patch address and pte. Use these in the non-temp mm path as well instead of adding a level of indirection through the text_poke_area vm_struct and pointer chasing the pte. As both paths use these fields now, there is no need to let unreferenced variables be dropped by the compiler, so it is cleaner to merge them into a single context struct. This has the additional benefit of removing a redundant CPU local pointer, as only one of cpu_patching_mm / text_poke_area is ever used, while remaining well-typed. It also groups each CPU's data into a single cacheline. Signed-off-by: Benjamin Gray [mpe: Shorten name to 'area' as suggested by Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-10-bgray@linux.ibm.com --- arch/powerpc/lib/code-patching.c | 49 +++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index a1902241ff5dd..5b8f87db12176 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -48,10 +48,16 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); -static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); -static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); -static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); @@ -116,14 +122,19 @@ static int text_area_cpu_up(unsigned int cpu) unmap_patch_area(addr); - this_cpu_write(text_poke_area, area); + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -167,9 +178,9 @@ static int text_area_cpu_up_mm(unsigned int cpu) goto fail_no_pte; pte_unmap_unlock(pte, ptl); - this_cpu_write(cpu_patching_mm, mm); - this_cpu_write(cpu_patching_addr, addr); - this_cpu_write(cpu_patching_pte, pte); + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, pte); return 0; @@ -181,12 +192,12 @@ static int text_area_cpu_up_mm(unsigned int cpu) static int text_area_cpu_down_mm(unsigned int cpu) { - put_patching_mm(this_cpu_read(cpu_patching_mm), - this_cpu_read(cpu_patching_addr)); + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); - this_cpu_write(cpu_patching_mm, NULL); - this_cpu_write(cpu_patching_addr, 0); - this_cpu_write(cpu_patching_pte, NULL); + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -278,9 +289,9 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) struct mm_struct *patching_mm; struct mm_struct *orig_mm; - patching_mm = __this_cpu_read(cpu_patching_mm); - pte = __this_cpu_read(cpu_patching_pte); - text_poke_addr = __this_cpu_read(cpu_patching_addr); + patching_mm = __this_cpu_read(cpu_patching_context.mm); + pte = __this_cpu_read(cpu_patching_context.pte); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); @@ -320,10 +331,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) pte_t *pte; unsigned long pfn = get_patch_pfn(addr); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); - pte = virt_to_kpte(text_poke_addr); + pte = __this_cpu_read(cpu_patching_context.pte); __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) -- GitLab From f9231a996e229c13d23f907352c2cea84bd1c30a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:36 +1000 Subject: [PATCH 480/694] module: add module_elf_check_arch for module-specific checks The elf_check_arch() function is also used to test compatibility of usermode binaries. Kernel modules may have more specific requirements, for example powerpc would like to test for ABI version compatibility. Add a weak module_elf_check_arch() that defaults to true, and call it from elf_validity_check(). Signed-off-by: Jessica Yu [np: added changelog, adjust name, rebase] Acked-by: Luis Chamberlain Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-2-npiggin@gmail.com --- include/linux/moduleloader.h | 3 +++ kernel/module/main.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 9e09d11ffe5b3..7b4587a191895 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -13,6 +13,9 @@ * must be implemented by each architecture. */ +/* arch may override to do additional checking of ELF header architecture */ +bool module_elf_check_arch(Elf_Ehdr *hdr); + /* Adjust arch-specific sections. Return 0 on success. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/kernel/module/main.c b/kernel/module/main.c index d02d39c7174e1..7b3f6fb0d4282 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1674,6 +1674,11 @@ static int elf_validity_check(struct load_info *info) info->hdr->e_machine); goto no_exec; } + if (!module_elf_check_arch(info->hdr)) { + pr_err("Invalid module architecture in ELF header: %u\n", + info->hdr->e_machine); + goto no_exec; + } if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { pr_err("Invalid ELF section header size\n"); goto no_exec; @@ -2247,6 +2252,11 @@ static void flush_module_icache(const struct module *mod) (unsigned long)mod->core_layout.base + mod->core_layout.size); } +bool __weak module_elf_check_arch(Elf_Ehdr *hdr) +{ + return true; +} + int __weak module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstrings, -- GitLab From de3d098dd1fc635535e3689c5d4aa0684242adde Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:37 +1000 Subject: [PATCH 481/694] powerpc/64: Add module check for ELF ABI version Override the generic module ELF check to provide a check for the ELF ABI version. This becomes important if we allow big-endian ELF ABI V2 builds but it doesn't hurt to check now. Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-3-npiggin@gmail.com --- arch/powerpc/kernel/module_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7e45dc98df8a1..ff045644f13ff 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -31,6 +31,16 @@ this, and makes other things simpler. Anton? --RR. */ +bool module_elf_check_arch(Elf_Ehdr *hdr) +{ + unsigned long abi_level = hdr->e_flags & 0x3; + + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + return abi_level == 2; + else + return abi_level < 2; +} + #ifdef CONFIG_PPC64_ELF_ABI_V2 static func_desc_t func_desc(unsigned long addr) -- GitLab From 505ea33089dcfc3ee3201b0fcb94751165805413 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:38 +1000 Subject: [PATCH 482/694] powerpc/64: Add big-endian ELFv2 flavour to crypto VMX asm generation This allows asm generation for big-endian ELFv2 builds. Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-4-npiggin@gmail.com --- drivers/crypto/vmx/Makefile | 12 +++++++++++- drivers/crypto/vmx/ppc-xlate.pl | 10 ++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index 2560cfea1dec2..e33c7238e7f8b 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -2,8 +2,18 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o +ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +override flavour := linux-ppc64le +else +ifdef CONFIG_PPC64_ELF_ABI_V2 +override flavour := linux-ppc64-elfv2 +else +override flavour := linux-ppc64 +endif +endif + quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ + cmd_perl = $(PERL) $< $(flavour) > $@ targets += aesp8-ppc.S ghashp8-ppc.S diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl index 36db2ef09e5bf..b583898c11ae8 100644 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ b/drivers/crypto/vmx/ppc-xlate.pl @@ -9,6 +9,8 @@ open STDOUT,">$output" || die "can't open $output: $!"; my %GLOBALS; my $dotinlocallabels=($flavour=~/linux/)?1:0; +my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0; +my $dotfunctions=($elfv2abi=~1)?0:1; ################################################################ # directives which need special treatment on different platforms @@ -40,7 +42,7 @@ my $globl = sub { }; my $text = sub { my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret = ".abiversion 2\n".$ret if ($elfv2abi); $ret; }; my $machine = sub { @@ -56,8 +58,8 @@ my $size = sub { if ($flavour =~ /linux/) { shift; my $name = shift; $name =~ s|^[\.\_]||; - my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; - $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + my $ret = ".size $name,.-".($dotfunctions?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($dotfunctions); $ret; } else @@ -142,7 +144,7 @@ my $vmr = sub { # Some ABIs specify vrsave, special-purpose register #256, as reserved # for system use. -my $no_vrsave = ($flavour =~ /linux-ppc64le/); +my $no_vrsave = ($elfv2abi); my $mtspr = sub { my ($f,$idx,$ra) = @_; if ($idx == 256 && $no_vrsave) { -- GitLab From 5017b45946722bdd20ac255c9ae7273b78d1f12e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2022 14:15:39 +1000 Subject: [PATCH 483/694] powerpc/64: Option to build big-endian with ELFv2 ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide an option to build big-endian kernels using the ELFv2 ABI. This works on GCC only for now. Clang is rumored to support this, but core build files need updating first, at least. This gives big-endian kernels useful advantages of the ELFv2 ABI, e.g., less stack usage, -mprofile-kernel support, better compatibility with eBPF tools. BE+ELFv2 is not officially supported by the GNU toolchain, but it works fine in testing and has been used by some userspace for some time (e.g., Void Linux). Tested-by: Michal Suchánek Reviewed-by: Segher Boessenkool Signed-off-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041539.1742489-5-npiggin@gmail.com --- arch/powerpc/Kconfig | 21 +++++++++++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index abaf1ef1795c0..1a134c9769f8b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 source "arch/powerpc/platforms/Kconfig.cputype" +config CC_HAS_ELFV2 + def_bool PPC64 && $(cc-option, -mabi=elfv2) + config 32BIT bool default y if PPC32 @@ -586,6 +589,24 @@ config KEXEC_FILE config ARCH_HAS_KEXEC_PURGATORY def_bool KEXEC_FILE +config PPC64_BIG_ENDIAN_ELF_ABI_V2 + bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)" + depends on PPC64 && CPU_BIG_ENDIAN + depends on CC_HAS_ELFV2 + depends on LD_IS_BFD && LD_VERSION >= 22400 + default n + help + This builds the kernel image using the "Power Architecture 64-Bit ELF + V2 ABI Specification", which has a reduced stack overhead and faster + function calls. This internal kernel ABI option does not affect + userspace compatibility. + + The V2 ABI is standard for 64-bit little-endian, but for big-endian + it is less well tested by kernel and toolchain. However some distros + build userspace this way, and it can produce a functioning kernel. + + This requires GCC and binutils 2.24 or newer. + config RELOCATABLE bool "Build a relocatable kernel" depends on PPC64 || (FLATMEM && (44x || PPC_85xx)) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7bac213b4125d..9563336e3348f 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -580,10 +580,10 @@ config CPU_LITTLE_ENDIAN endchoice config PPC64_ELF_ABI_V1 - def_bool PPC64 && CPU_BIG_ENDIAN + def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2) config PPC64_ELF_ABI_V2 - def_bool PPC64 && CPU_LITTLE_ENDIAN + def_bool PPC64 && !PPC64_ELF_ABI_V1 config PPC64_BOOT_WRAPPER def_bool n -- GitLab From d6aee468e4ecbfec46a3eafae4d31d6efc0d4da4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:27 +1000 Subject: [PATCH 484/694] powerpc/64: Remove asm interrupt tracing call helpers These are now unused. Remove. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-3-npiggin@gmail.com --- arch/powerpc/include/asm/irqflags.h | 58 ----------------------------- 1 file changed, 58 deletions(-) diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1a6c1ce17735a..47d46712928ac 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -11,64 +11,6 @@ */ #include -#else -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_IRQSOFF_TRACER -/* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ -#define TRACE_WITH_FRAME_BUFFER(func) \ - mflr r0; \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - std r0, 16(r1); \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - bl func; \ - ld r1, 0(r1); \ - ld r1, 0(r1); -#else -#define TRACE_WITH_FRAME_BUFFER(func) \ - bl func; -#endif - -/* - * These are calls to C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) -#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) - -/* - * This is used by assembly code to soft-disable interrupts first and - * reconcile irq state. - * - * NB: This may call C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQSOFTMASK(r13); \ - lbz __rB,PACAIRQHAPPENED(r13); \ - andi. __rA,__rA,IRQS_DISABLED; \ - li __rA,IRQS_DISABLED; \ - ori __rB,__rB,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQHAPPENED(r13); \ - bne 44f; \ - stb __rA,PACAIRQSOFTMASK(r13); \ - TRACE_DISABLE_INTS; \ -44: - -#else -#define TRACE_ENABLE_INTS -#define TRACE_DISABLE_INTS - -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,IRQS_DISABLED; \ - ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQSOFTMASK(r13); \ - stb __rA,PACAIRQHAPPENED(r13) -#endif #endif #endif -- GitLab From 32c5209214bd8d4f8c4e9d9b630ef4c671f58e79 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:28 +1000 Subject: [PATCH 485/694] powerpc/perf: callchain validate kernel stack pointer bounds The interrupt frame detection and loads from the hypothetical pt_regs are not bounds-checked. The next-frame validation only bounds-checks STACK_FRAME_OVERHEAD, which does not include the pt_regs. Add another test for this. The user could set r1 to be equal to the address matching the first interrupt frame - STACK_INT_FRAME_SIZE, which is in the previous page due to the kernel redzone, and induce the kernel to load the marker from there. Possibly this could cause a crash at least. If the user could induce the previous page to contain a valid marker, then it might be able to direct perf to read specific memory addresses in a way that could be transmitted back to the user in the perf data. Fixes: 20002ded4d93 ("perf_counter: powerpc: Add callchain support") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-4-npiggin@gmail.com --- arch/powerpc/perf/callchain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 082f6d0308a47..8718289c051dd 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -61,6 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re next_sp = fp[0]; if (next_sp == sp + STACK_INT_FRAME_SIZE && + validate_sp(sp, current, STACK_INT_FRAME_SIZE) && fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an -- GitLab From bc0677363d0ffaec0c56685291e97b080116976c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:29 +1000 Subject: [PATCH 486/694] powerpc: Rearrange copy_thread child stack creation This makes it a bit clearer where the stack frame is created, and will allow easier use of some of the stack offset constants in a later change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-5-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5265da2d80340..f93703ea4a127 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1755,13 +1755,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) klp_init_thread_info(p); + /* Create initial stack frame. */ + sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + ((unsigned long *)sp)[0] = 0; + /* Copy registers */ - sp -= sizeof(struct pt_regs); - childregs = (struct pt_regs *) sp; + childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); if (unlikely(args->fn)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + sizeof(struct pt_regs); + childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); /* function */ if (args->fn) childregs->gpr[14] = ppc_function_entry((void *)args->fn); @@ -1796,7 +1799,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); - sp -= STACK_FRAME_OVERHEAD; /* * The way this works is that at some point in the future @@ -1806,7 +1808,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - ((unsigned long *)sp)[0] = 0; sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; -- GitLab From baa49d81a94bb4170e7f2f4d97016772117d0f60 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:30 +1000 Subject: [PATCH 487/694] powerpc/pseries: hvcall stack frame overhead This call may use the min size stack frame. The scratch space used is in the caller's parameter area frame, not this function's frame. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-6-npiggin@gmail.com --- arch/powerpc/platforms/pseries/hvCall.S | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 762eb15d3bd42..783c16ad648b8 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -27,7 +27,9 @@ hcall_tracepoint_refcount: /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. STK_PARAM() in the caller's + * frame will be available even on ELFv2 because these are all + * variadic functions. */ #define HCALL_INST_PRECALL(FIRST_REG) \ mflr r0; \ @@ -41,29 +43,29 @@ hcall_tracepoint_refcount: std r10,STK_PARAM(R10)(r1); \ std r0,16(r1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + stdu r1,-STACK_FRAME_MIN_SIZE(r1); \ bl __trace_hcall_entry; \ - ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ - ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ - ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ - ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ - ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ - ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) + ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ - ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ mr r4,r3; \ mr r3,r0; \ bl __trace_hcall_exit; \ - ld r0,STACK_FRAME_OVERHEAD+16(r1); \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,STACK_FRAME_MIN_SIZE+16(r1); \ + addi r1,r1,STACK_FRAME_MIN_SIZE; \ ld r3,STK_PARAM(R3)(r1); \ mtlr r0 @@ -303,14 +305,14 @@ plpar_hcall9_trace: mr r7,r8 mr r8,r9 mr r9,r10 - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) - ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1) HVSC mr r0,r12 - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) -- GitLab From 37195b820d32c23bdefce3f460ed7de48a57e5e4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:31 +1000 Subject: [PATCH 488/694] powerpc: simplify ppc_save_regs Adjust the pt_regs pointer so the interrupt frame offsets can be used to save registers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-7-npiggin@gmail.com --- arch/powerpc/kernel/ppc_save_regs.S | 57 ++++++++--------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 2d4d21bb46a97..6e86f3bf46735 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -21,60 +21,33 @@ * different ABIs, though). */ _GLOBAL(ppc_save_regs) - PPC_STL r0,0*SZL(r3) + /* This allows stack frame accessor macros and offsets to be used */ + subi r3,r3,STACK_FRAME_OVERHEAD + PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 - stmw r2, 2*SZL(r3) + stmw r2,GPR2(r3) #else - PPC_STL r2,2*SZL(r3) - PPC_STL r3,3*SZL(r3) - PPC_STL r4,4*SZL(r3) - PPC_STL r5,5*SZL(r3) - PPC_STL r6,6*SZL(r3) - PPC_STL r7,7*SZL(r3) - PPC_STL r8,8*SZL(r3) - PPC_STL r9,9*SZL(r3) - PPC_STL r10,10*SZL(r3) - PPC_STL r11,11*SZL(r3) - PPC_STL r12,12*SZL(r3) - PPC_STL r13,13*SZL(r3) - PPC_STL r14,14*SZL(r3) - PPC_STL r15,15*SZL(r3) - PPC_STL r16,16*SZL(r3) - PPC_STL r17,17*SZL(r3) - PPC_STL r18,18*SZL(r3) - PPC_STL r19,19*SZL(r3) - PPC_STL r20,20*SZL(r3) - PPC_STL r21,21*SZL(r3) - PPC_STL r22,22*SZL(r3) - PPC_STL r23,23*SZL(r3) - PPC_STL r24,24*SZL(r3) - PPC_STL r25,25*SZL(r3) - PPC_STL r26,26*SZL(r3) - PPC_STL r27,27*SZL(r3) - PPC_STL r28,28*SZL(r3) - PPC_STL r29,29*SZL(r3) - PPC_STL r30,30*SZL(r3) - PPC_STL r31,31*SZL(r3) + SAVE_GPRS(2, 31, r3) lbz r0,PACAIRQSOFTMASK(r13) - PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,SOFTE(r3) #endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) - PPC_STL r4,1*SZL(r3) + PPC_STL r4,GPR1(r3) /* get caller's LR */ PPC_LL r0,LRSAVE(r4) - PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK(r3) mflr r0 - PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_NIP(r3) mfmsr r0 - PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_MSR(r3) mfctr r0 - PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CTR(r3) mfxer r0 - PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_XER(r3) mfcr r0 - PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CCR(r3) li r0,0 - PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) - PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_TRAP(r3) + PPC_STL r0,ORIG_GPR3(r3) blr -- GitLab From c03be0a3f3cc656eab5c427b78959b8f1b169a11 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:32 +1000 Subject: [PATCH 489/694] powerpc: add definition for pt_regs offset within an interrupt frame This is a common offset that currently uses the overloaded STACK_FRAME_OVERHEAD constant. It's easier to read and more flexible to use a specific regs offset for this. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-8-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 2 + arch/powerpc/kernel/asm-offsets.c | 7 +- arch/powerpc/kernel/entry_32.S | 6 +- arch/powerpc/kernel/exceptions-64e.S | 42 +++++----- arch/powerpc/kernel/exceptions-64s.S | 80 +++++++++---------- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_85xx.S | 4 +- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/interrupt_64.S | 22 ++--- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/optprobes_head.S | 4 +- arch/powerpc/kernel/ppc_save_regs.S | 2 +- arch/powerpc/kernel/process.c | 4 +- arch/powerpc/kernel/tm.S | 8 +- arch/powerpc/kernel/trace/ftrace_mprofile.S | 2 +- .../lib/test_emulate_step_exec_instr.S | 2 +- arch/powerpc/perf/callchain.c | 2 +- arch/powerpc/xmon/xmon.c | 7 +- 18 files changed, 100 insertions(+), 100 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 2efec6d87049e..a4ae67aa9b76d 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -124,6 +124,7 @@ struct pt_regs #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -143,6 +144,7 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b4b661f631f5f..68905c9f7c21c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -72,7 +72,7 @@ #endif #define STACK_PT_REGS_OFFSET(sym, val) \ - DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) + DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val)) int main(void) { @@ -167,9 +167,8 @@ int main(void) OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr); - /* Local pt_regs on stack for Transactional Memory funcs. */ - DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + - sizeof(struct pt_regs) + 16); + /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */ + DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3fc7c9886bb70..24c8d84a56c96 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -123,12 +123,12 @@ transfer_to_syscall: kuep_lock /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 bl system_call_exception ret_from_syscall: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 bl syscall_exit_prepare #ifdef CONFIG_PPC_47x @@ -293,7 +293,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2f68fb2ee4fc3..62033d022e0a2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -455,7 +455,7 @@ exc_##n##_bad_stack: \ EXCEPTION_COMMON(trapnum) \ ack(r8); \ CHECK_NAPPING(); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ + addi r3,r1,STACK_INT_FRAME_REGS; \ bl hdlr; \ b interrupt_return @@ -504,7 +504,7 @@ __end_interrupts: EXCEPTION_COMMON_CRIT(0x100) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -515,7 +515,7 @@ __end_interrupts: EXCEPTION_COMMON_MC(0x000) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception b ret_from_mc_except @@ -570,7 +570,7 @@ __end_interrupts: std r14,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) EXCEPTION_COMMON(0x700) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -586,7 +586,7 @@ __end_interrupts: beq- 1f bl load_up_fpu b fast_interrupt_return -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception b interrupt_return @@ -606,7 +606,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return @@ -616,7 +616,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception @@ -643,7 +643,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0x9f0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else @@ -664,7 +664,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -731,7 +731,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) EXCEPTION_COMMON_CRIT(0xd00) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -802,7 +802,7 @@ kernel_dbg_exc: ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) EXCEPTION_COMMON_DBG(0xd08) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -812,7 +812,7 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) CHECK_NAPPING() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS /* * XXX: Returning from performance_monitor_exception taken as a * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return @@ -834,7 +834,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2a0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -846,7 +846,7 @@ kernel_dbg_exc: GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -857,7 +857,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2e0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -866,7 +866,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -875,7 +875,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -884,7 +884,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -979,7 +979,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_page_fault b interrupt_return @@ -988,7 +988,7 @@ storage_fault_common: * continues here. */ alignment_more: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) b interrupt_return @@ -1069,7 +1069,7 @@ bad_stack_book3e: ZEROIZE_GPR(12) std r12,0(r11) LOAD_PACA_TOC() -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_bad_stack b 1b diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 651c36b056bde..29b78536ca597 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1061,7 +1061,7 @@ EXC_COMMON_BEGIN(system_reset_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl system_reset_exception /* Clear MSR_RI before setting SRR0 and SRR1. */ @@ -1208,7 +1208,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_FTR_SECTION bl machine_check_early_boot END_FTR_SECTION(0, 1) // nop out after boot @@ -1298,7 +1298,7 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception_async b interrupt_return_srr @@ -1364,14 +1364,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * This is the NMI version of the handler because we are called from * the early handler which is a true NMI. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . @@ -1422,7 +1422,7 @@ EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andis. r0,r4,DSISR_DABRMATCH@h bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU @@ -1479,7 +1479,7 @@ EXC_COMMON_BEGIN(data_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1493,7 +1493,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1525,7 +1525,7 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1567,7 +1567,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1581,7 +1581,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1635,7 +1635,7 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ BEGIN_FTR_SECTION b interrupt_return_hsrr @@ -1665,7 +1665,7 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1731,7 +1731,7 @@ EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_BODY program_check .Ldo_program_check: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1762,7 +1762,7 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 @@ -1780,7 +1780,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl fp_unavailable_tm b interrupt_return_srr #endif @@ -1824,7 +1824,7 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl timer_interrupt b interrupt_return_srr @@ -1909,7 +1909,7 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2076,7 +2076,7 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl single_step_exception b interrupt_return_srr @@ -2110,7 +2110,7 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE @@ -2139,7 +2139,7 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2162,7 +2162,7 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl emulation_assist_interrupt REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2222,7 +2222,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl hmi_exception_realmode cmpdi cr0,r3,0 bne 1f @@ -2240,7 +2240,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl handle_hmi_exception b interrupt_return_hsrr @@ -2274,7 +2274,7 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2310,7 +2310,7 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ b interrupt_return_hsrr @@ -2356,7 +2356,7 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS lbz r4,PACAIRQSOFTMASK(r13) cmpdi r4,IRQS_ENABLED bne 1f @@ -2410,14 +2410,14 @@ BEGIN_FTR_SECTION b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return_srr @@ -2458,14 +2458,14 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_exception b interrupt_return_srr @@ -2492,7 +2492,7 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2520,7 +2520,7 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ b interrupt_return_hsrr @@ -2550,7 +2550,7 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_system_error_exception b interrupt_return_hsrr @@ -2581,7 +2581,7 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl instruction_breakpoint_exception b interrupt_return_srr @@ -2703,7 +2703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2720,7 +2720,7 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_maintenance_exception b interrupt_return_hsrr @@ -2745,7 +2745,7 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) EXC_VIRT_END(altivec_assist, 0x5700, 0x100) EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC bl altivec_assist_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ @@ -2767,7 +2767,7 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_thermal_exception b interrupt_return_hsrr @@ -2800,7 +2800,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl soft_nmi_interrupt /* Clear MSR_RI before setting SRR0 and SRR1. */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c3286260a7d1c..117d25330e139 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -127,7 +127,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 52c0ab416326a..24f39abf81dff 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -972,10 +972,10 @@ _GLOBAL(__giveup_spe) li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ beq 1f - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lwz r4,_MSR-STACK_INT_FRAME_REGS(r5) lis r3,MSR_SPE@h andc r4,r4,r3 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) + stw r4,_MSR-STACK_INT_FRAME_REGS(r5) 1: blr #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1cb9d0f7cbf25..3149ac20b18e0 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -99,7 +99,7 @@ END_BTB_FLUSH_SECTION mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc8393..49d585eae7c8b 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -78,7 +78,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r11,-16(r3) /* "regshere" marker */ @@ -99,7 +99,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) bl system_call_exception .Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -176,7 +176,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -251,7 +251,7 @@ END_BTB_FLUSH_SECTION std r12,_CCR(r1) std r3,ORIG_GPR3(r1) /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r11,-16(r3) /* "regshere" marker */ @@ -278,7 +278,7 @@ END_BTB_FLUSH_SECTION bl system_call_exception .Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -357,7 +357,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -388,7 +388,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . /* should not get here */ #else @@ -406,7 +406,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) beq interrupt_return_\srr\()_kernel interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_user_prepare cmpdi r3,0 bne- .Lrestore_nvgprs_\srr @@ -503,7 +503,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_user_restart @@ -518,7 +518,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr .balign IFETCH_ALIGN_BYTES interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_kernel_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -684,7 +684,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_kernel_restart diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 1a1e9995dae35..ebe4d1645ca1f 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -191,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + - STACK_FRAME_OVERHEAD); + STACK_INT_FRAME_REGS); unsigned long *ptr = gdb_regs; int reg; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cd4e7bc32609d..35932f45fb4ec 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -85,7 +85,7 @@ optprobe_template_op_address: TEMPLATE_FOR_IMM_LOAD_INSNS /* 2. pt_regs pointer in r4 */ - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS .global optprobe_template_call_handler optprobe_template_call_handler: @@ -96,7 +96,7 @@ optprobe_template_call_handler: * Parameters for instruction emulation: * 1. Pass SP in register r3. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .global optprobe_template_insn optprobe_template_insn: diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 6e86f3bf46735..49813f9824681 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -22,7 +22,7 @@ */ _GLOBAL(ppc_save_regs) /* This allows stack frame accessor macros and offsets to be used */ - subi r3,r3,STACK_FRAME_OVERHEAD + subi r3,r3,STACK_INT_FRAME_REGS PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 stmw r2,GPR2(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f93703ea4a127..d7a581997d926 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2260,12 +2260,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. - * We look for the "regshere" marker in the current frame. + * We look for the "regs" marker in the current frame. */ if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) - (sp + STACK_FRAME_OVERHEAD); + (sp + STACK_INT_FRAME_REGS); lr = regs->link; printk("%s--- interrupt: %lx at %pS\n", diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5a0f023a26e90..9feab5e0485bf 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) - /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */ std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) @@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ @@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint) stdu r1, -TM_FRAME_SIZE(r1) /* - * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. * This is used for backing up the NVGPRs: */ SAVE_NVGPRS(r1) @@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is now NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* We need to setup MSR for FP/VMX/VSX register save instructions. */ mfmsr r6 diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index d031093bc4367..ffb1db3868499 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -110,7 +110,7 @@ .endif /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD + addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 5473f9d03df3a..e2b646a4f7fa1 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -16,7 +16,7 @@ _GLOBAL(exec_instr) /* * Stack frame layout (INT_FRAME_SIZE bytes) - * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) * Scratch space (SP + 8) * Back chain (SP + 0) */ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 8718289c051dd..9e254aed1f61a 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -67,7 +67,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re * This looks like an interrupt frame for an * interrupt that occurred in the kernel */ - regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); next_ip = regs->nip; lr = regs->link; level = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e34d7809f6c9f..a14eb4d815c2f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1782,14 +1782,13 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, xmon_print_symbol(ip, " ", "\n"); } - /* Look for "regshere" marker to see if this is + /* Look for "regs" marker to see if this is an exception frame. */ if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) && marker == STACK_FRAME_REGS_MARKER) { - if (mread(sp + STACK_FRAME_OVERHEAD, ®s, sizeof(regs)) - != sizeof(regs)) { + if (mread(sp + STACK_INT_FRAME_REGS, ®s, sizeof(regs)) != sizeof(regs)) { printf("Couldn't read registers at %lx\n", - sp + STACK_FRAME_OVERHEAD); + sp + STACK_INT_FRAME_REGS); break; } printf("--- Exception: %lx %s at ", regs.trap, -- GitLab From d2e8ff9f1492f44c5a6d93f759eea27574d753de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:33 +1000 Subject: [PATCH 490/694] powerpc: add a definition for the marker offset within the interrupt frame Define a constant rather than open-code the offset for the "regs" marker. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-9-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 2 ++ arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/head_32.h | 2 +- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/interrupt_64.S | 10 +++++----- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index a4ae67aa9b76d..8a9f4cf8c4c58 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -125,6 +125,7 @@ struct pt_regs #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) #define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -145,6 +146,7 @@ struct pt_regs #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 24c8d84a56c96..2f61b7d3677c1 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -114,7 +114,7 @@ transfer_to_syscall: addi r12,r12,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r1) li r2, INTERRUPT_SYSCALL - stw r12,8(r1) + stw r12,STACK_INT_FRAME_MARKER(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 62033d022e0a2..b9cec22df9f97 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -391,7 +391,7 @@ exc_##n##_common: \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ - std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ SAVE_NVGPRS(r1); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 29b78536ca597..ac3b0580224ee 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -591,7 +591,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) li r10,0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r10,RESULT(r1) /* clear regs->result */ - std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ + std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */ .endm /* diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 117d25330e139..f8e2911478a79 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -112,7 +112,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r1) + stw r10,STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 3149ac20b18e0..37d43c1726766 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -84,7 +84,7 @@ END_BTB_FLUSH_SECTION stw r0,GPR0(r1) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r1) + stw r10, STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 49d585eae7c8b..321992c1c9f90 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -77,11 +77,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ BEGIN_FTR_SECTION HMT_MEDIUM @@ -250,11 +250,11 @@ END_BTB_FLUSH_SECTION std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ #ifdef CONFIG_PPC_BOOK3S li r11,1 @@ -637,7 +637,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - std r0,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_INT_FRAME_MARKER(r1) REST_GPRS(2, 5, r1) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98f..a9e162a1deec9 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2729,7 +2729,7 @@ kvmppc_bad_host_intr: std r6, SOFTE(r1) LOAD_PACA_TOC() LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER) - std r3, STACK_FRAME_OVERHEAD-16(r1) + std r3, STACK_INT_FRAME_MARKER(r1) /* * XXX On POWER7 and POWER8, we just spin here since we don't -- GitLab From e856e336924b0ecd0b7058e65e6b3e7266ee0b95 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:34 +1000 Subject: [PATCH 491/694] powerpc: Rename STACK_FRAME_MARKER and derive it from frame offset This is a count of longs from the stack pointer to the regs marker. Rename it to make it more distinct from the other byte offsets. It can be derived from the byte offset definitions just added. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-10-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 4 ++-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/stacktrace.c | 2 +- arch/powerpc/perf/callchain.c | 2 +- arch/powerpc/xmon/xmon.c | 3 +-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 8a9f4cf8c4c58..fdd50648df56e 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -126,7 +126,6 @@ struct pt_regs STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) -#define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 #define STACK_FRAME_MIN_SIZE 32 @@ -147,7 +146,6 @@ struct pt_regs #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) -#define STACK_FRAME_MARKER 2 #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ @@ -155,6 +153,8 @@ struct pt_regs #endif /* __powerpc64__ */ +#define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long)) + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d7a581997d926..6c0a3c664266c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2263,7 +2263,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, * We look for the "regs" marker in the current frame. */ if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) - && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index a2443d61728eb..7efa0ec9dd774 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -136,7 +136,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* Mark stacktraces with exception frames as unreliable. */ if (sp <= stack_end - STACK_INT_FRAME_SIZE && - stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { return -EINVAL; } diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 9e254aed1f61a..b01497ed51738 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -62,7 +62,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re if (next_sp == sp + STACK_INT_FRAME_SIZE && validate_sp(sp, current, STACK_INT_FRAME_SIZE) && - fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an * interrupt that occurred in the kernel diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index a14eb4d815c2f..0da66bc4823d4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1721,7 +1721,6 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp, } #define LRSAVE_OFFSET (STACK_FRAME_LR_SAVE * sizeof(unsigned long)) -#define MARKER_OFFSET (STACK_FRAME_MARKER * sizeof(unsigned long)) static void xmon_show_stack(unsigned long sp, unsigned long lr, unsigned long pc) @@ -1784,7 +1783,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, /* Look for "regs" marker to see if this is an exception frame. */ - if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) + if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long)) && marker == STACK_FRAME_REGS_MARKER) { if (mread(sp + STACK_INT_FRAME_REGS, ®s, sizeof(regs)) != sizeof(regs)) { printf("Couldn't read registers at %lx\n", -- GitLab From 1223e5a20f7fb3c31c91a328d1a04ed26d5e889b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:35 +1000 Subject: [PATCH 492/694] powerpc: add a define for the user interrupt frame size The user interrupt frame is a different size from the kernel frame, so give it its own name. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-11-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 6 +++--- arch/powerpc/kernel/process.c | 6 +++--- arch/powerpc/kernel/stacktrace.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index fdd50648df56e..705ce26ae887d 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -122,8 +122,7 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ - STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) @@ -143,7 +142,7 @@ struct pt_regs #define KERNEL_REDZONE_SIZE 0 #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD @@ -153,6 +152,7 @@ struct pt_regs #endif /* __powerpc64__ */ +#define STACK_INT_FRAME_SIZE (KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE) #define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long)) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6c0a3c664266c..010a5ee746aef 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1756,15 +1756,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) klp_init_thread_info(p); /* Create initial stack frame. */ - sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + sp -= STACK_USER_INT_FRAME_SIZE; ((unsigned long *)sp)[0] = 0; /* Copy registers */ - childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); if (unlikely(args->fn)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD); + childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; /* function */ if (args->fn) childregs->gpr[14] = ppc_function_entry((void *)args->fn); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 7efa0ec9dd774..453ac317a6cf5 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -77,7 +77,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common()/EXCEPTION_PROLOG_COMMON(). + * system_call_common(). * * Likewise for non-swapper kernel threads, * this also happens to be the top of the stack @@ -88,7 +88,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * an unreliable stack trace until it's been * _switch()'ed to for the first time. */ - stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + stack_end -= STACK_USER_INT_FRAME_SIZE; } else { /* * idle tasks have a custom stack layout, -- GitLab From 6f291a03819e4051ebc870471d26915ef2e6ba31 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:36 +1000 Subject: [PATCH 493/694] powerpc: add a define for the switch frame size and regs offset This is open-coded in process.c, ppc32 uses a different define with the same value, and the C definition is name differently which makes it an extra indirection to grep for. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-12-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 6 ++++-- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_32.S | 6 +++--- arch/powerpc/kernel/process.c | 12 ++++++++---- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 705ce26ae887d..412ef07497752 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -97,8 +97,6 @@ struct pt_regs #endif -#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) - // Always displays as "REGS" in memory dumps #ifdef CONFIG_CPU_BIG_ENDIAN #define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753) @@ -125,6 +123,8 @@ struct pt_regs #define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC64_ELF_ABI_V2 #define STACK_FRAME_MIN_SIZE 32 @@ -146,6 +146,8 @@ struct pt_regs #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 68905c9f7c21c..d24a59a98c0c9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -260,7 +260,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); + DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2f61b7d3677c1..6e99ec10be89b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -215,9 +215,9 @@ ret_from_kernel_thread: * in arch/ppc/kernel/process.c */ _GLOBAL(_switch) - stwu r1,-INT_FRAME_SIZE(r1) + stwu r1,-SWITCH_FRAME_SIZE(r1) mflr r0 - stw r0,INT_FRAME_SIZE+4(r1) + stw r0,SWITCH_FRAME_SIZE+4(r1) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ @@ -248,7 +248,7 @@ _GLOBAL(_switch) lwz r4,_NIP(r1) /* Return to _switch caller in new task */ mtlr r4 - addi r1,r1,INT_FRAME_SIZE + addi r1,r1,SWITCH_FRAME_SIZE blr .globl fast_exception_return diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 010a5ee746aef..0cb5296c6c41e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1808,10 +1808,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - sp -= sizeof(struct pt_regs); - kregs = (struct pt_regs *) sp; - sp -= STACK_FRAME_OVERHEAD; + sp -= STACK_SWITCH_FRAME_SIZE; + kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); p->thread.ksp = sp; + #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -2261,8 +2261,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. * We look for the "regs" marker in the current frame. + * + * STACK_SWITCH_FRAME_SIZE being the smallest frame that + * could hold a pt_regs, if that does not fit then it can't + * have regs. */ - if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) + if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE) && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); -- GitLab From 6895dfc0474170c492191c126fcfc420f7771a09 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:37 +1000 Subject: [PATCH 494/694] powerpc: copy_thread fill in interrupt frame marker and back chain Backtraces will not recognise the fork system call interrupt without the regs marker. And regular interrupt entry from userspace creates the back chain to the user stack, so do this for the initial fork frame too, to be consistent. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-13-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0cb5296c6c41e..6b1d80bd370ea 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1757,12 +1757,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Create initial stack frame. */ sp -= STACK_USER_INT_FRAME_SIZE; - ((unsigned long *)sp)[0] = 0; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; /* Copy registers */ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); if (unlikely(args->fn)) { /* kernel thread */ + ((unsigned long *)sp)[0] = 0; memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; /* function */ @@ -1782,6 +1783,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) *childregs = *regs; if (usp) childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; p->thread.regs = childregs; /* 64s sets this in ret_from_fork */ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) -- GitLab From edbd0387f3249cc7e102f86d4852a9a9f3bb1305 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:38 +1000 Subject: [PATCH 495/694] powerpc: copy_thread add a back chain to the switch stack frame Stack unwinders need LR and the back chain as a minimum. The switch stack uses regs->nip for its return pointer rather than lrsave, so that was not set in the fork frame, and neither was the back chain. This change sets those fields in the stack. With this and the previous change, a stack trace in the switch or interrupt stack goes from looking like this: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 3 PID: 90 Comm: systemd Not tainted NIP: c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff [ ... regs ... ] NIP [c000000000011060] _switch+0x160/0x17c LR [c000000000010f68] _switch+0x68/0x17c Call Trace: To this: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries CPU: 0 PID: 93 Comm: systemd Not tainted NIP: c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff [ ... regs ... ] NIP [c000000000011060] _switch+0x160/0x17c LR [c000000000010f68] _switch+0x68/0x17c Call Trace: [c000000005a93e10] [c00000000000cdbc] ret_from_fork_scv+0x0/0x54 --- interrupt: 3000 at 0x7fffa72f56d8 NIP: 00007fffa72f56d8 LR: 0000000000000000 CTR: 0000000000000000 [ ... regs ... ] NIP [00007fffa72f56d8] 0x7fffa72f56d8 LR [0000000000000000] 0x0 --- interrupt: 3000 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-14-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6b1d80bd370ea..096b6ea52378c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1810,7 +1810,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ + ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f; sp -= STACK_SWITCH_FRAME_SIZE; + ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); p->thread.ksp = sp; -- GitLab From 4cefb0f6c555971b3e6544a9b15470f9d1f12089 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:39 +1000 Subject: [PATCH 496/694] powerpc: split validate_sp into two functions Most callers just want to validate an arbitrary kernel stack pointer, some need a particular size. Make the size case the exceptional one with an extra function. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-15-npiggin@gmail.com --- arch/powerpc/include/asm/processor.h | 15 ++++++++++++--- arch/powerpc/kernel/process.c | 23 ++++++++++++++--------- arch/powerpc/kernel/stacktrace.c | 2 +- arch/powerpc/perf/callchain.c | 6 +++--- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 631802999d598..e96c9b8c2a60b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -374,9 +374,18 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #endif -/* Check that a certain kernel stack pointer is valid in task_struct p */ -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes); +/* + * Check that a certain kernel stack pointer is a valid (minimum sized) + * stack frame in task_struct p. + */ +int validate_sp(unsigned long sp, struct task_struct *p); + +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes); /* * Prefetch macros. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 096b6ea52378c..9446bee8ca32a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2157,9 +2157,12 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, return 0; } - -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes) +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -2175,7 +2178,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, return valid_emergency_stack(sp, p, nbytes); } -EXPORT_SYMBOL(validate_sp); +int validate_sp(unsigned long sp, struct task_struct *p) +{ + return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD); +} static unsigned long ___get_wchan(struct task_struct *p) { @@ -2183,13 +2189,12 @@ static unsigned long ___get_wchan(struct task_struct *p) int count = 0; sp = p->thread.ksp; - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, p)) return 0; do { sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || - task_is_running(p)) + if (!validate_sp(sp, p) || task_is_running(p)) return 0; if (count > 0) { ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); @@ -2243,7 +2248,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = 0; printk("%sCall Trace:\n", loglvl); do { - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, tsk)) break; stack = (unsigned long *) sp; @@ -2270,7 +2275,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, * could hold a pt_regs, if that does not fit then it can't * have regs. */ - if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE) + if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE) && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_INT_FRAME_REGS); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 453ac317a6cf5..1dbbf30f265e0 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -43,7 +43,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task)) return; newsp = stack[0]; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index b01497ed51738..6b4434dd0ff30 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -27,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) { if (sp & 0xf) return 0; /* must be 16-byte aligned */ - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return 0; if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; @@ -53,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re sp = regs->gpr[1]; perf_callchain_store(entry, perf_instruction_pointer(regs)); - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return; for (;;) { @@ -61,7 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re next_sp = fp[0]; if (next_sp == sp + STACK_INT_FRAME_SIZE && - validate_sp(sp, current, STACK_INT_FRAME_SIZE) && + validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) && fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an -- GitLab From 90f1b43196c5e79f6c986a359011a19857984c27 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:40 +1000 Subject: [PATCH 497/694] powerpc: allow minimum sized kernel stack frames This affects only 64-bit ELFv2 kernels, and reduces the minimum asm-created stack frame size from 112 to 32 byte on those kernels. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-16-npiggin@gmail.com --- arch/powerpc/kernel/head_40x.S | 2 +- arch/powerpc/kernel/head_44x.S | 6 +++--- arch/powerpc/kernel/head_64.S | 6 +++--- arch/powerpc/kernel/head_85xx.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 2 +- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 4 ++-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/kernel/stacktrace.c | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 088f500896c78..918547b93b5e9 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -602,7 +602,7 @@ start_here: lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init /* We have to do this with MMU on */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f15cb9fdb692f..63a85c16fef46 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -109,7 +109,7 @@ _GLOBAL(_start); lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init @@ -1012,7 +1012,7 @@ _GLOBAL(start_secondary_47x) */ lis r1,temp_boot_stack@h ori r1,r1,temp_boot_stack@l - addi r1,r1,1024-STACK_FRAME_OVERHEAD + addi r1,r1,1024-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) bl mmu_init_secondary @@ -1025,7 +1025,7 @@ _GLOBAL(start_secondary_47x) lwz r1,TASK_STACK(r2) /* Current stack pointer */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dedcc6fe2263a..b513d13bf79e4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -424,7 +424,7 @@ generic_secondary_common_init: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) @@ -780,7 +780,7 @@ _GLOBAL(pmac_secondary_start) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE b __secondary_start @@ -958,7 +958,7 @@ start_here_multiplatform: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) + stdu r0,-STACK_FRAME_MIN_SIZE(r1) /* * Do very early kernel initializations, including initial hash table diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 24f39abf81dff..d9bd377dec919 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -229,7 +229,7 @@ set_ivor: lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) #ifdef CONFIG_SMP stw r24, TASK_CPU(r2) @@ -1044,7 +1044,7 @@ __secondary_start: lwz r1,TASK_STACK(r2) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9f..cf546d0e5c403 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -537,7 +537,7 @@ start_here: ori r0, r0, STACK_END_MAGIC@l stw r0, 0(r1) li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) lis r6, swapper_pg_dir@ha tophys(r6,r6) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 519b606951675..40854d092dd31 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -840,7 +840,7 @@ __secondary_start: lwz r1,TASK_STACK(r1) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 tophys(r3,r1) stw r0,0(r3) @@ -966,7 +966,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) /* * Do early platform-specific initialization, * and set up the MMU. diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9ede61a5a469e..c5b9ce8874834 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -210,7 +210,7 @@ static __always_inline void call_do_softirq(const void *sp) PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_softirq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", @@ -264,7 +264,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) : // Outputs "+r" (r3) : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_irq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e5127b19fec29..daf8f87d23728 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -382,7 +382,7 @@ EXPORT_SYMBOL(__bswapdi2) _GLOBAL(start_secondary_resume) /* Reset stack */ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b1..4bb6dd30c5568 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -384,7 +384,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3) mr r1,r3 li r0,0 @@ -401,7 +401,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-STACK_FRAME_OVERHEAD-64(r1) + stdu r1,-STACK_FRAME_MIN_SIZE-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9446bee8ca32a..edb46d0806ef7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2180,7 +2180,7 @@ int validate_sp_size(unsigned long sp, struct task_struct *p, int validate_sp(unsigned long sp, struct task_struct *p) { - return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD); + return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE); } static unsigned long ___get_wchan(struct task_struct *p) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0da6e59161cd4..6b90f10a6c819 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1249,7 +1249,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_PPC64 paca_ptrs[cpu]->__current = idle; paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + - THREAD_SIZE - STACK_FRAME_OVERHEAD; + THREAD_SIZE - STACK_FRAME_MIN_SIZE; #endif task_thread_info(idle)->cpu = cpu; secondary_current = current_set[cpu] = idle; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1dbbf30f265e0..5de8597eaab8d 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -94,7 +94,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * idle tasks have a custom stack layout, * c.f. cpu_idle_thread_init(). */ - stack_end -= STACK_FRAME_OVERHEAD; + stack_end -= STACK_FRAME_MIN_SIZE; } if (task == current) -- GitLab From cd52414d5a6ccea6ce956ef05161fe824522a107 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:41 +1000 Subject: [PATCH 498/694] powerpc/64: ELFv2 use minimal stack frames in int and switch frame sizes Adjust the ELFv2 interrupt and switch frames to the minimum C ABI size, plus pt_regs, plus 16 bytes for the aligned regs marker for the int frame (and the switch frame needs to match that because it uses the same regs offset as the int frame). This saves 80 bytes of kernel stack per interrupt. It's the principle of getting our accounting right that's more important than the practical saving. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-17-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 412ef07497752..4ab606f390bc4 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -120,16 +120,26 @@ struct pt_regs #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ + +#ifdef CONFIG_PPC64_ELF_ABI_V2 +#define STACK_FRAME_MIN_SIZE 32 +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_MARKER STACK_FRAME_MIN_SIZE +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_SWITCH_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) +#else +/* + * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save + * area. This parameter area is not used by calls to C from interrupt entry, + * so the second from last one of those is used for the frame marker. + */ +#define STACK_FRAME_MIN_SIZE 112 #define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD #define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) #define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD - -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#define STACK_FRAME_MIN_SIZE 32 -#else -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD #endif /* Size of dummy stack frame allocated when calling signal handler. */ -- GitLab From dfecd06bc5524517ed7737c30eaaf747338b280a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2022 22:49:42 +1000 Subject: [PATCH 499/694] powerpc: remove STACK_FRAME_OVERHEAD This is equal to STACK_FRAME_MIN_SIZE on 32-bit and 64-bit ELFv1, and no longer used in 64-bit ELFv2, so replace STACK_FRAME_OVERHEAD occurrences with STACK_FRAME_MIN_SIZE. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221127124942.1665522-18-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 4ab606f390bc4..0eb90a0133466 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -118,7 +118,6 @@ struct pt_regs #define USER_REDZONE_SIZE 512 #define KERNEL_REDZONE_SIZE 288 -#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -135,11 +134,11 @@ struct pt_regs * so the second from last one of those is used for the frame marker. */ #define STACK_FRAME_MIN_SIZE 112 -#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD -#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 16) -#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 16) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE #endif /* Size of dummy stack frame allocated when calling signal handler. */ @@ -150,14 +149,13 @@ struct pt_regs #define USER_REDZONE_SIZE 0 #define KERNEL_REDZONE_SIZE 0 -#define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ +#define STACK_FRAME_MIN_SIZE 16 #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_INT_FRAME_REGS STACK_FRAME_OVERHEAD -#define STACK_INT_FRAME_MARKER (STACK_FRAME_OVERHEAD - 8) -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD -#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_SWITCH_FRAME_REGS STACK_FRAME_OVERHEAD +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 8) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 -- GitLab From 6b34a099faa123488b13caf704562f4dbe483fc4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 24 Oct 2022 13:01:50 +1000 Subject: [PATCH 500/694] powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults This option increases the number of hash misses by limiting the number of kernel HPT entries, by keeping a per-CPU record of the last kernel HPTEs installed, and removing that from the hash table on the next hash insertion. A timer round-robins CPUs removing remaining kernel HPTEs and clearing the TLB (in the case of bare metal) to increase and slightly randomise kernel fault activity. Signed-off-by: Nicholas Piggin [mpe: Add comment about NR_CPUS usage, fixup whitespace] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com --- .../admin-guide/kernel-parameters.txt | 5 + arch/powerpc/mm/book3s64/hash_4k.c | 5 + arch/powerpc/mm/book3s64/hash_64k.c | 10 ++ arch/powerpc/mm/book3s64/hash_utils.c | 130 +++++++++++++++++- arch/powerpc/mm/book3s64/internal.h | 11 ++ 5 files changed, 160 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774a..9f3d256529d01 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1042,6 +1042,11 @@ them frequently to increase the rate of SLB faults on kernel addresses. + stress_hpt [PPC] + Limits the number of kernel HPT entries in the hash + page table to increase the rate of hash page table + faults on kernel addresses. + disable= [IPV6] See Documentation/networking/ipv6.rst. diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 7de1a8a0c62a8..02acbfd05b460 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) @@ -118,6 +120,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 998c6817ed47b..954af420f3586 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + /* * Return true, if the entry has a slot value which * the software considers as invalid. @@ -216,6 +218,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -327,7 +332,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } + *ptep = __pte(new_pte & ~H_PAGE_BUSY); + return 0; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6df4c6d38b660..80a148c57de81 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -995,6 +1046,20 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1980,6 +2045,69 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn, return slot; } +void hpt_clear_stress(void) +{ + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; + + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } + + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; + } +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index 5045048ce244d..a57a25f06a215 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -13,6 +13,17 @@ static inline bool stress_slb(void) return static_branch_unlikely(&stress_slb_key); } +extern bool stress_hpt_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_hpt_key); + +static inline bool stress_hpt(void) +{ + return static_branch_unlikely(&stress_hpt_key); +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group); + void slb_setup_new_exec(void); void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); -- GitLab From 5921eb36d2a1b276b16a24e529788550e6a65449 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 1 Dec 2022 10:49:57 +0800 Subject: [PATCH 501/694] selftests: powerpc: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this using "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/testing/selftests/powerpc` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Signed-off-by: Tiezhu Yang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1669862997-31335-1-git-send-email-yangtiezhu@loongson.cn --- tools/testing/selftests/powerpc/scripts/hmi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh index dcdb392e84275..bcc7b6b650099 100755 --- a/tools/testing/selftests/powerpc/scripts/hmi.sh +++ b/tools/testing/selftests/powerpc/scripts/hmi.sh @@ -36,7 +36,7 @@ trap "ppc64_cpu --smt-snooze-delay=100" 0 1 # for each chip+core combination # todo - less fragile parsing -egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | +grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | while read chipcore; do chip=$(echo "$chipcore"|awk '{print $3}') core=$(echo "$chipcore"|awk '{print $5}') -- GitLab From aecfd680099ba518c34dff2941017c5aa97def52 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Mon, 28 Nov 2022 15:19:42 +1100 Subject: [PATCH 502/694] selftests/powerpc: Use mfspr/mtspr macros No need to write inline asm for mtspr/mfspr, we have macros for this in reg.h Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041948.58339-2-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/dscr/dscr.h | 17 +++++------------ .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 6 ++---- tools/testing/selftests/powerpc/ptrace/ptrace.h | 5 +---- .../selftests/powerpc/security/flush_utils.c | 3 ++- 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h index 13e9b9e28e2c7..b703714e7d98c 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr.h +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -23,6 +23,7 @@ #include #include +#include "reg.h" #include "utils.h" #define THREADS 100 /* Max threads */ @@ -41,31 +42,23 @@ /* Prilvilege state DSCR access */ inline unsigned long get_dscr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV)); - - return ret; + return mfspr(SPRN_DSCR_PRIV); } inline void set_dscr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV)); + mtspr(SPRN_DSCR_PRIV, val); } /* Problem state DSCR access */ inline unsigned long get_dscr_usr(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR)); - - return ret; + return mfspr(SPRN_DSCR); } inline void set_dscr_usr(unsigned long val) { - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } /* Default DSCR access */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index a0635a3819aa4..1345e9b9af0fb 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -23,6 +23,7 @@ #include #include #include "ptrace.h" +#include "reg.h" #define SPRN_PVR 0x11F #define PVR_8xx 0x00500000 @@ -620,10 +621,7 @@ static int ptrace_hwbreak(void) int main(int argc, char **argv, char **envp) { - int pvr = 0; - asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); - if (pvr == PVR_8xx) - is_8xx = true; + is_8xx = mfspr(SPRN_PVR) == PVR_8xx; return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 4e0233c0f2b3a..04788e5fc5040 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -745,10 +745,7 @@ int show_tm_spr(pid_t child, struct tm_spr_regs *out) /* Analyse TEXASR after TM failure */ inline unsigned long get_tfiar(void) { - unsigned long ret; - - asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR)); - return ret; + return mfspr(SPRN_TFIAR); } void analyse_texasr(unsigned long texasr) diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 4d95965cb751f..9c5c00e04f633 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -14,6 +14,7 @@ #include #include #include +#include "reg.h" #include "utils.h" #include "flush_utils.h" @@ -79,5 +80,5 @@ void set_dscr(unsigned long val) init = 1; } - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); + mtspr(SPRN_DSCR, val); } -- GitLab From 94ba4f2c33f42dae7813dc169a177e922a39560c Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Mon, 28 Nov 2022 15:19:43 +1100 Subject: [PATCH 503/694] selftests/powerpc: Add ptrace setup_core_pattern() null-terminator - malloc() does not zero the buffer, - fread() does not null-terminate it's output, - `cat /proc/sys/kernel/core_pattern | hexdump -C` shows the file is not inherently null-terminated So using string operations on the buffer is risky. Explicitly add a null character to the end to make it safer. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221128041948.58339-3-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 1a70a96f0bfe7..4e8d0ce1ff58f 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -383,7 +383,7 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } - ret = fread(core_pattern, 1, PATH_MAX, f); + ret = fread(core_pattern, 1, PATH_MAX - 1, f); fclose(f); if (!ret) { perror("Error reading core_pattern file"); @@ -391,6 +391,8 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) goto out; } + core_pattern[ret] = '\0'; + /* Check whether we can predict the name of the core file. */ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) *changed_ = false; -- GitLab From 4d0eea415216fe3791da2f65eb41399e70c7bedf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 19:16:26 +0800 Subject: [PATCH 504/694] powerpc/83xx/mpc832x_rdb: call platform_device_put() in error case in of_fsl_spi_probe() If platform_device_add() is not called or failed, it can not call platform_device_del() to clean up memory, it should call platform_device_put() in error case. Fixes: 26f6cb999366 ("[POWERPC] fsl_soc: add support for fsl_spi") Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221029111626.429971-1-yangyingliang@huawei.com --- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index e12cb44e717f1..caa96edf0e72a 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -107,7 +107,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto next; unreg: - platform_device_del(pdev); + platform_device_put(pdev); err: pr_err("%pOF: registration failed\n", np); next: -- GitLab From 03f7c1d2a49acd30e38789cd809d3300721e9b0e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 30 Nov 2022 23:15:13 +0530 Subject: [PATCH 505/694] powerpc/hv-gpci: Fix hv_gpci event list Based on getPerfCountInfo v1.018 documentation, some of the hv_gpci events were deprecated for platform firmware that supports counter_info_version 0x8 or above. Fix the hv_gpci event list by adding a new attribute group called "hv_gpci_event_attrs_v6" and a "ENABLE_EVENTS_COUNTERINFO_V6" macro to enable these events for platform firmware that supports counter_info_version 0x6 or below. And assigning the hv_gpci event list based on output counter info version of underlying plaform. Fixes: 97bf2640184f ("powerpc/perf/hv-gpci: add the remaining gpci requests") Signed-off-by: Kajol Jain Reviewed-by: Madhavan Srinivasan Reviewed-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221130174513.87501-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci-requests.h | 4 ++++ arch/powerpc/perf/hv-gpci.c | 35 ++++++++++++++++++++++++++-- arch/powerpc/perf/hv-gpci.h | 1 + arch/powerpc/perf/req-gen/perf.h | 20 ++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 8965b4463d433..5e86371a20c78 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 /* * Not available for counter_info_version >= 0x8, use * run_instruction_cycles_by_partition(0x100) instead. @@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id) __count(0x10, 8, cycles) ) #include I(REQUEST_END) +#endif #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 @@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 #define REQUEST_NAME processor_bus_utilization_abc_links #define REQUEST_NUM 0x50 #define REQUEST_IDX_KIND "hw_chip_id=?" @@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx) __count(0x28, 8, instructions_completed) ) #include I(REQUEST_END) +#endif /* Processor_core_power_mode (0x95) skipped, no counters */ /* Affinity_domain_information_by_virtual_processor (0xA0) skipped, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 5eb60ed5b5e8a..7ff8ff3509f5f 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -70,9 +70,9 @@ static const struct attribute_group format_group = { .attrs = format_attrs, }; -static const struct attribute_group event_group = { +static struct attribute_group event_group = { .name = "events", - .attrs = hv_gpci_event_attrs, + /* .attrs is set in init */ }; #define HV_CAPS_ATTR(_name, _format) \ @@ -330,6 +330,7 @@ static int hv_gpci_init(void) int r; unsigned long hret; struct hv_perf_caps caps; + struct hv_gpci_request_buffer *arg; hv_gpci_assert_offsets_correct(); @@ -353,6 +354,36 @@ static int hv_gpci_init(void) /* sampling not supported */ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the output + * counter_info_version value based on the system hypervisor. + * Pass the counter request 0x10 corresponds to request type + * 'Dispatch_timebase_by_processor', to get the supported + * counter_info_version. + */ + arg->params.counter_request = cpu_to_be32(0x10); + + r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + if (r) { + pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); + arg->params.counter_info_version_out = 0x8; + } + + /* + * Use counter_info_version_out value to assign + * required hv-gpci event list. + */ + if (arg->params.counter_info_version_out >= 0x8) + event_group.attrs = hv_gpci_event_attrs; + else + event_group.attrs = hv_gpci_event_attrs_v6; + + put_cpu_var(hv_gpci_reqb); + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index 4d108262bed79..c72020912dea5 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -26,6 +26,7 @@ enum { #define REQUEST_FILE "../hv-gpci-requests.h" #define NAME_LOWER hv_gpci #define NAME_UPPER HV_GPCI +#define ENABLE_EVENTS_COUNTERINFO_V6 #include "req-gen/perf.h" #undef REQUEST_FILE #undef NAME_LOWER diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h index fa9bc804e67af..6b2a59fefffa7 100644 --- a/arch/powerpc/perf/req-gen/perf.h +++ b/arch/powerpc/perf/req-gen/perf.h @@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \ #define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ r_fields +/* Generate event list for platforms with counter_info_version 0x6 or below */ +static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = { +#include REQUEST_FILE + NULL +}; + +/* + * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci + * events were deprecated for platform firmware that supports + * counter_info_version 0x8 or above. + * Those deprecated events are still part of platform firmware that + * support counter_info_version 0x6 and below. As per the getPerfCountInfo + * v1.018 documentation there is no counter_info_version 0x7. + * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of + * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms + * that supports counter_info_version 0x8 or above. + */ +#undef ENABLE_EVENTS_COUNTERINFO_V6 + +/* Generate event list for platforms with counter_info_version 0x8 or above*/ static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { #include REQUEST_FILE NULL -- GitLab From 0e23347f1e0f2b1c98f87a4088231d0d6f59b962 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:13 +1100 Subject: [PATCH 506/694] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig Add Kconfig option for enabling clearing of registers on arrival in an interrupt handler. This reduces the speculation influence of registers on kernel internals. The option will be consumed by 64-bit systems that feature speculation and wish to implement this mitigation. This patch only introduces the Kconfig option, no actual mitigations. The primary overhead of this mitigation lies in an increased number of registers that must be saved and restored by interrupt handlers on Book3S systems. Enable by default on Book3E systems, which prior to this patch eagerly save and restore register state, meaning that the mitigation when implemented will have minimal overhead. Acked-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-1-rmclure@linux.ibm.com --- arch/powerpc/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe2aa445b654e..aec1431be06e3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -534,6 +534,15 @@ config HOTPLUG_CPU Say N if you are unsure. +config INTERRUPT_SANITIZE_REGISTERS + bool "Clear gprs on interrupt arrival" + depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER + default PPC_BOOK3E_64 + help + Reduce the influence of user register state on interrupt handlers and + syscalls through clearing user state from registers before handling + the exception. + config PPC_QUEUED_SPINLOCKS bool "Queued spinlocks" if EXPERT depends on SMP -- GitLab From cbf892ba56677b942020d2bc7ca9b79281fa0bcc Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:14 +1100 Subject: [PATCH 507/694] powerpc/64: Add interrupt register sanitisation macros Include in asm/ppc_asm.h macros to be used in multiple successive patches to implement zeroising architected registers in interrupt handlers. Registers will be sanitised in this fashion in future patches to reduce the speculation influence of user-controlled register values. These mitigations will be configurable through the CONFIG_INTERRUPT_SANITIZE_REGISTERS Kconfig option. Included are macros for conditionally zeroising registers and restoring as required with the mitigation enabled. With the mitigation disabled, non-volatiles must be restored on demand at separate locations to those required by the mitigation. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-2-rmclure@linux.ibm.com --- arch/powerpc/include/asm/ppc_asm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 753a2757bcd4f..d2f44612f4b02 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -74,6 +74,25 @@ #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) #define REST_GPR(n, base) REST_GPRS(n, n, base) +/* macros for handling user register sanitisation */ +#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS +#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \ + ZEROIZE_GPRS(5, 12); \ + ZEROIZE_NVGPRS() +#define SANITIZE_GPR(n) ZEROIZE_GPR(n) +#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1) +#define HANDLER_RESTORE_NVGPRS() +#else +#define SANITIZE_SYSCALL_GPRS() +#define SANITIZE_GPR(n) +#define SANITIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() +#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1) +#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */ + #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) #define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base) -- GitLab From 75c5d6b1e194c341371639469fcb8691afa0e254 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:15 +1100 Subject: [PATCH 508/694] powerpc/64: Sanitise common exit code for interrupts Interrupt code is shared between Book3E/S 64-bit systems for interrupt handlers. Ensure that exit code correctly restores non-volatile gprs on each system when CONFIG_INTERRUPT_SANITIZE_REGISTERS is enabled. Also introduce macros for clearing/restoring registers on interrupt entry for when this configuration option is either disabled or enabled. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-3-rmclure@linux.ibm.com --- arch/powerpc/kernel/interrupt_64.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 321992c1c9f90..dd04b0ba3959a 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -408,9 +408,11 @@ interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_user_prepare +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr .Lrestore_nvgprs_\srr\()_cont: +#endif std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Linterrupt_return_\srr\()_user_rst_start: @@ -424,6 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS .Lfast_user_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr lbz r4,PACASRR_VALID(r13) @@ -493,9 +496,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b . /* prevent speculative execution */ .Linterrupt_return_\srr\()_user_rst_end: +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS .Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) b .Lrestore_nvgprs_\srr\()_cont +#endif #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: @@ -585,6 +590,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() cmpdi cr1,r3,0 #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr -- GitLab From 2487fd2e6d61b5293eed8ecd25add3cc78593d38 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:16 +1100 Subject: [PATCH 509/694] powerpc/64s: IOption for MSR stored in r12 Interrupt handlers in asm/exceptions-64s.S contain a great deal of common code produced by the GEN_COMMON macros. Currently, at the exit point of the macro, r12 will contain the contents of the MSR. A future patch will cause these macros to zeroise architected registers to avoid potential speculation influence of user data. Provide an IOption that signals that r12 must be retained, as the interrupt handler assumes it to hold the contents of the MSR. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-4-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ac3b0580224ee..42b7c3212f296 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -111,6 +111,7 @@ name: #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ +#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */ #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -176,6 +177,9 @@ do_define_int n .ifndef IKUAP IKUAP=1 .endif + .ifndef IMSR_R12 + IMSR_R12=0 + .endif .endm /* @@ -1751,6 +1755,7 @@ INT_DEFINE_BEGIN(fp_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(fp_unavailable) EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) @@ -2384,6 +2389,7 @@ INT_DEFINE_BEGIN(altivec_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -2433,6 +2439,7 @@ INT_DEFINE_BEGIN(vsx_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) -- GitLab From 1df45d78b8a89da6544fab5267e8f5da15073d28 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:17 +1100 Subject: [PATCH 510/694] powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64s.S | 27 ++++++++++++++++++--------- arch/powerpc/kernel/interrupt_64.S | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 42b7c3212f296..429096b037d7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -506,6 +506,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text) std r10,0(r1) /* make stack chain pointer */ std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + SANITIZE_GPR(0) /* Mark our [H]SRRs valid for return */ li r10,1 @@ -548,8 +549,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) + .if !IMSR_R12 + SANITIZE_GPRS(9, 12) + .else + SANITIZE_GPRS(9, 11) + .endif SAVE_NVGPRS(r1) + SANITIZE_NVGPRS() .if IDAR .if IISIDE @@ -581,8 +588,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) - std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ + SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */ + SANITIZE_GPRS(2, 8) mflr r9 /* Get LR, later save to stack */ LOAD_PACA_TOC() /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -700,6 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 + SANITIZE_RESTORE_NVGPRS() REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ @@ -1445,7 +1453,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() b interrupt_return_srr @@ -1671,7 +1679,7 @@ EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1737,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common) .Ldo_program_check: addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2169,7 +2177,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist addi r3,r1,STACK_INT_FRAME_REGS bl emulation_assist_interrupt - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2501,7 +2509,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2529,7 +2537,8 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ + /* XXX Shouldn't be necessary in practice */ + HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2755,7 +2764,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC bl altivec_assist_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else bl unknown_exception #endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index dd04b0ba3959a..fccc34489add8 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -96,6 +96,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * but this is the best we can do. */ + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_vectored_\name\()_exit: @@ -124,6 +129,7 @@ BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_vectored_\name\()_restore_regs @@ -159,7 +165,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r4,_LINK(r1) ld r5,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() REST_GPR(0, r1) mtcr r2 mtctr r3 @@ -275,6 +281,11 @@ END_BTB_FLUSH_SECTION wrteei 1 #endif + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_exit: @@ -315,6 +326,7 @@ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ @@ -342,7 +354,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .Lsyscall_restore_regs: ld r3,_CTR(r1) ld r4,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() mtctr r3 mtspr SPRN_XER,r4 REST_GPR(0, r1) -- GitLab From efe1691ac814e4cf3653538b701662cbd905bddc Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:18 +1100 Subject: [PATCH 511/694] powerpc/64e: Clear gprs on interrupt routine entry on Book3E Zero GPRS r14-r31 on entry into the kernel for interrupt sources to limit influence of user-space values in potential speculation gadgets. Prior to this commit, all other GPRS are reassigned during the common prologue to interrupt handlers and so need not be zeroised explicitly. This may be done safely, without loss of register state prior to the interrupt, as the common prologue saves the initial values of non-volatiles, which are unconditionally restored in interrupt_64.S. Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-6-rmclure@linux.ibm.com --- arch/powerpc/kernel/exceptions-64e.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index b9cec22df9f97..3f86091e68b3b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -358,7 +358,6 @@ ret_from_mc_except: std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) - /* Core exception code for all exceptions except TLB misses. */ #define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ @@ -394,7 +393,8 @@ exc_##n##_common: \ std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ - SAVE_NVGPRS(r1); + SAVE_NVGPRS(r1); \ + SANITIZE_NVGPRS(); /* minimise speculation influence */ #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) -- GitLab From 7cd882df9485988f7d9b3fae04fde4e95a4c7a74 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Thu, 1 Dec 2022 18:10:19 +1100 Subject: [PATCH 512/694] powerpc/64: Sanitise user registers on interrupt in pseries, POWERNV Cause pseries and POWERNV platforms to default to zeroising all potentially user-defined registers when entering the kernel by means of any interrupt source, reducing user-influence of the kernel and the likelihood or producing speculation gadgets. Acked-by: Nicholas Piggin Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201071019.1953023-7-rmclure@linux.ibm.com --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aec1431be06e3..e21d6de797d6e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -537,7 +537,7 @@ config HOTPLUG_CPU config INTERRUPT_SANITIZE_REGISTERS bool "Clear gprs on interrupt arrival" depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER - default PPC_BOOK3E_64 + default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV help Reduce the influence of user register state on interrupt handlers and syscalls through clearing user state from registers before handling -- GitLab From ad050d2390fccb22aa3e6f65e11757ce7a5a7ca5 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Thu, 1 Dec 2022 11:14:42 -0500 Subject: [PATCH 513/694] powerpc/ftrace: fix syscall tracing on PPC64_ELF_ABI_V1 In v5.7 the powerpc syscall entry/exit logic was rewritten in C, on PPC64_ELF_ABI_V1 this resulted in the symbols in the syscall table changing from their dot prefixed variant to the non-prefixed ones. Since ftrace prefixes a dot to the syscall names when matching them to build its syscall event list, this resulted in no syscall events being available. Remove the PPC64_ELF_ABI_V1 specific version of arch_syscall_match_sym_name to have the same behavior across all powerpc variants. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Michael Jeanson Reviewed-by: Mathieu Desnoyers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221201161442.2127231-1-mjeanson@efficios.com --- arch/powerpc/include/asm/ftrace.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ade406dc6504e..441c5f08258b5 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -71,17 +71,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, * those. */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME -#ifdef CONFIG_PPC64_ELF_ABI_V1 -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* We need to skip past the initial dot, and the __se_sys alias */ - return !strcmp(sym + 1, name) || - (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || - (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || - (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || - (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); -} -#else static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { return !strcmp(sym, name) || @@ -90,7 +79,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ #if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) -- GitLab From 84ecfe6f38ae4ee779ebd97ee173937fff565bf9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:39 +0100 Subject: [PATCH 514/694] powerpc/code-patching: Remove #ifdef CONFIG_STRICT_KERNEL_RWX No need to have one implementation of patch_instruction() for CONFIG_STRICT_KERNEL_RWX and one for !CONFIG_STRICT_KERNEL_RWX. In patch_instruction(), call raw_patch_instruction() when !CONFIG_STRICT_KERNEL_RWX. In poking_init(), bail out immediately, it will be equivalent to the weak default implementation. Everything else is declared static and will be discarded by GCC when !CONFIG_STRICT_KERNEL_RWX. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f67d2a109404d03e8fdf1ea15388c8778337a76b.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/code-patching.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 5b8f87db12176..a6a5047f8ba23 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -46,8 +46,6 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) return __patch_instruction(addr, instr, addr); } -#ifdef CONFIG_STRICT_KERNEL_RWX - struct patch_context { union { struct vm_struct *area; @@ -208,6 +206,9 @@ void __init poking_init(void) { int ret; + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + return; + if (mm_patch_enabled()) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke_mm:online", @@ -358,7 +359,8 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!static_branch_likely(&poking_init_done)) + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -370,14 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -#else /* !CONFIG_STRICT_KERNEL_RWX */ - -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) -{ - return raw_patch_instruction(addr, instr); -} - -#endif /* CONFIG_STRICT_KERNEL_RWX */ __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); -- GitLab From 6076dc349b1c587c74c37027efff76f0fa4646f4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:40 +0100 Subject: [PATCH 515/694] powerpc/feature-fixups: Refactor entry fixups patching Several fonctions have the same loop for patching instructions. Introduce function do_patch_entry_fixups() to refactor those loops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/79eeff7b20a98f7136da5f79b1f7c436928f27f3.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 84 ++++++++++++------------------- 1 file changed, 32 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 31f40f544de54..93b3f8ea38aa7 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -118,9 +118,33 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -144,23 +168,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - // See comment in do_entry_flush_fixups() RE order of patching - if (types & STF_BARRIER_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -325,7 +334,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) static int __do_entry_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -375,42 +384,13 @@ static int __do_entry_flush_fixups(void *data) start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); start = PTRRELOC(&__start___scv_entry_flush_fixup); end = PTRRELOC(&__stop___scv_entry_flush_fixup); - for (; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } - + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : -- GitLab From 3d1dbbca33a9c6dd3aafd4d14aaea9cc310723e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:41 +0100 Subject: [PATCH 516/694] powerpc/feature-fixups: Refactor other fixups patching Several fonctions have the same loop for patching instructions. Introduce function do_patch_fixups() to refactor those loops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/58ab36949c18f94d466fc98d6c085783b0cd474f.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 77 +++++++++++-------------------- 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 93b3f8ea38aa7..25168a59d1ce1 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -117,6 +117,24 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + #ifdef CONFIG_PPC_BOOK3S_64 static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, bool do_fallback, void *fallback) @@ -181,7 +199,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[6], *dest; + unsigned int instrs[6]; long *start, *end; int i; @@ -215,18 +233,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - patch_instruction(dest + 4, ppc_inst(instrs[4])); - patch_instruction(dest + 5, ppc_inst(instrs[5])); - } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : (types == STF_BARRIER_FALLBACK) ? "fallback" : @@ -283,7 +291,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) void do_uaccess_flush_fixups(enum l1d_flush_type types) { - unsigned int instrs[4], *dest; + unsigned int instrs[4]; long *start, *end; int i; @@ -309,17 +317,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -418,7 +416,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types) static int __do_rfi_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -442,15 +440,7 @@ static int __do_rfi_flush_fixups(void *data) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -492,7 +482,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr, *dest; + unsigned int instr; long *start, *end; int i; @@ -506,12 +496,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr)); - } + i = do_patch_fixups(start, end, &instr, 1); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } @@ -533,7 +518,7 @@ void do_barrier_nospec_fixups(bool enable) #ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr[2], *dest; + unsigned int instr[2]; long *start, *end; int i; @@ -549,13 +534,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr[1] = PPC_RAW_SYNC(); } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr[0])); - patch_instruction(dest + 1, ppc_inst(instr[1])); - } + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } -- GitLab From b988e7797d09379057cf991ae082f9ad7a309a63 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:42 +0100 Subject: [PATCH 517/694] powerpc/feature-fixups: Do not patch init section after init Once init section is freed, attempting to patch init code ends up in the weed. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") protected patch_instruction() against that, but it is the responsibility of the caller to ensure that the patched memory is valid. In the same spirit as jump_label with its jump_label_can_update() function, add is_fixup_addr_valid() function to skip patching on freed init section. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8e9311fc1b057e4e6a2a3a0701ebcc74b787affe.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/feature-fixups.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 25168a59d1ce1..80def1c2afcb6 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -118,6 +118,12 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) { int i; @@ -126,6 +132,9 @@ static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num int j; unsigned int *dest = (void *)start + *start; + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + pr_devel("patching dest %lx\n", (unsigned long)dest); for (j = 0; j < num; j++) @@ -144,6 +153,9 @@ static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, for (i = 0; start < end; start++, i++) { unsigned int *dest = (void *)start + *start; + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + pr_devel("patching dest %lx\n", (unsigned long)dest); // See comment in do_entry_flush_fixups() RE order of patching -- GitLab From 6f3a81b60091031c2c14eb2373d1937b027deb46 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:43 +0100 Subject: [PATCH 518/694] powerpc/code-patching: Remove protection against patching init addresses after init Once init section is freed, attempting to patch init code ends up in the weed. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") protected patch_instruction() against that, but it is the responsibility of the caller to ensure that the patched memory is valid. All callers have now been verified and fixed so the check can be removed. This improves ftrace activation by about 2% on 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/504310828f473d424e2ed229eff57bf075f52796.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 2 -- arch/powerpc/lib/code-patching.c | 13 +------------ arch/powerpc/mm/mem.c | 1 - 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74f..3f881548fb61c 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,8 +22,6 @@ #define BRANCH_SET_LINK 0x1 #define BRANCH_ABSOLUTE 0x2 -DECLARE_STATIC_KEY_FALSE(init_mem_is_free); - /* * Powerpc branch instruction is : * diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index a6a5047f8ba23..73ce4b90bb1b8 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -349,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { int err; unsigned long flags; @@ -372,17 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } - -__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); - -int patch_instruction(u32 *addr, ppc_inst_t instr) -{ - /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) - return 0; - - return do_patch_instruction(addr, instr); -} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 84d171953ba44..8b121df7b08f8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -344,7 +344,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - static_branch_enable(&init_mem_is_free); free_initmem_default(POISON_FREE_INITMEM); ftrace_free_init_tramp(); } -- GitLab From 25483dedd2f5d9bc6928cd790ee59772fb880a79 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 2 Dec 2022 17:11:44 +0530 Subject: [PATCH 519/694] dmaengine: Revert "dmaengine: remove s3c24xx driver" This reverts cccc46ae3623 ("dmaengine: remove s3c24xx driver") as it causes regression due to missing header Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 12 + drivers/dma/Makefile | 1 + drivers/dma/s3c24xx-dma.c | 1428 +++++++++++++++++++++ include/linux/platform_data/dma-s3c24xx.h | 48 + 4 files changed, 1489 insertions(+) create mode 100644 drivers/dma/s3c24xx-dma.c create mode 100644 include/linux/platform_data/dma-s3c24xx.h diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index b9d54f20812f7..80848c6b5cd55 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -610,6 +610,18 @@ config SPRD_DMA help Enable support for the on-chip DMA controller on Spreadtrum platform. +config S3C24XX_DMAC + bool "Samsung S3C24XX DMA support" + depends on ARCH_S3C24XX || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the Samsung S3C24XX DMA controller driver. The + DMA controller is having multiple DMA channels which can be + configured for different peripherals like audio, UART, SPI. + The DMA controller can transfer data from memory to peripheral, + periphal to memory, periphal to periphal and memory to memory. + config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a4fd1ce295102..5b55ada052a7f 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o obj-$(CONFIG_SPRD_DMA) += sprd-dma.o +obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c new file mode 100644 index 0000000000000..a09eeb545f7d5 --- /dev/null +++ b/drivers/dma/s3c24xx-dma.c @@ -0,0 +1,1428 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * S3C24XX DMA handling + * + * Copyright (c) 2013 Heiko Stuebner + * + * based on amba-pl08x.c + * + * Copyright (c) 2006 ARM Ltd. + * Copyright (c) 2010 ST-Ericsson SA + * + * Author: Peter Pearse + * Author: Linus Walleij + * + * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals + * that can be routed to any of the 4 to 8 hardware-channels. + * + * Therefore on these DMA controllers the number of channels + * and the number of incoming DMA signals are two totally different things. + * It is usually not possible to theoretically handle all physical signals, + * so a multiplexing scheme with possible denial of use is necessary. + * + * Open items: + * - bursts + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dmaengine.h" +#include "virt-dma.h" + +#define MAX_DMA_CHANNELS 8 + +#define S3C24XX_DISRC 0x00 +#define S3C24XX_DISRCC 0x04 +#define S3C24XX_DISRCC_INC_INCREMENT 0 +#define S3C24XX_DISRCC_INC_FIXED BIT(0) +#define S3C24XX_DISRCC_LOC_AHB 0 +#define S3C24XX_DISRCC_LOC_APB BIT(1) + +#define S3C24XX_DIDST 0x08 +#define S3C24XX_DIDSTC 0x0c +#define S3C24XX_DIDSTC_INC_INCREMENT 0 +#define S3C24XX_DIDSTC_INC_FIXED BIT(0) +#define S3C24XX_DIDSTC_LOC_AHB 0 +#define S3C24XX_DIDSTC_LOC_APB BIT(1) +#define S3C24XX_DIDSTC_INT_TC0 0 +#define S3C24XX_DIDSTC_INT_RELOAD BIT(2) + +#define S3C24XX_DCON 0x10 + +#define S3C24XX_DCON_TC_MASK 0xfffff +#define S3C24XX_DCON_DSZ_BYTE (0 << 20) +#define S3C24XX_DCON_DSZ_HALFWORD (1 << 20) +#define S3C24XX_DCON_DSZ_WORD (2 << 20) +#define S3C24XX_DCON_DSZ_MASK (3 << 20) +#define S3C24XX_DCON_DSZ_SHIFT 20 +#define S3C24XX_DCON_AUTORELOAD 0 +#define S3C24XX_DCON_NORELOAD BIT(22) +#define S3C24XX_DCON_HWTRIG BIT(23) +#define S3C24XX_DCON_HWSRC_SHIFT 24 +#define S3C24XX_DCON_SERV_SINGLE 0 +#define S3C24XX_DCON_SERV_WHOLE BIT(27) +#define S3C24XX_DCON_TSZ_UNIT 0 +#define S3C24XX_DCON_TSZ_BURST4 BIT(28) +#define S3C24XX_DCON_INT BIT(29) +#define S3C24XX_DCON_SYNC_PCLK 0 +#define S3C24XX_DCON_SYNC_HCLK BIT(30) +#define S3C24XX_DCON_DEMAND 0 +#define S3C24XX_DCON_HANDSHAKE BIT(31) + +#define S3C24XX_DSTAT 0x14 +#define S3C24XX_DSTAT_STAT_BUSY BIT(20) +#define S3C24XX_DSTAT_CURRTC_MASK 0xfffff + +#define S3C24XX_DMASKTRIG 0x20 +#define S3C24XX_DMASKTRIG_SWTRIG BIT(0) +#define S3C24XX_DMASKTRIG_ON BIT(1) +#define S3C24XX_DMASKTRIG_STOP BIT(2) + +#define S3C24XX_DMAREQSEL 0x24 +#define S3C24XX_DMAREQSEL_HW BIT(0) + +/* + * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel + * for a DMA source. Instead only specific channels are valid. + * All of these SoCs have 4 physical channels and the number of request + * source bits is 3. Additionally we also need 1 bit to mark the channel + * as valid. + * Therefore we separate the chansel element of the channel data into 4 + * parts of 4 bits each, to hold the information if the channel is valid + * and the hw request source to use. + * + * Example: + * SDI is valid on channels 0, 2 and 3 - with varying hw request sources. + * For it the chansel field would look like + * + * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1 + * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2 + * ((BIT(3) | 2) << 0 * 4) // channel 0, with request source 2 + */ +#define S3C24XX_CHANSEL_WIDTH 4 +#define S3C24XX_CHANSEL_VALID BIT(3) +#define S3C24XX_CHANSEL_REQ_MASK 7 + +/* + * struct soc_data - vendor-specific config parameters for individual SoCs + * @stride: spacing between the registers of each channel + * @has_reqsel: does the controller use the newer requestselection mechanism + * @has_clocks: are controllable dma-clocks present + */ +struct soc_data { + int stride; + bool has_reqsel; + bool has_clocks; +}; + +/* + * enum s3c24xx_dma_chan_state - holds the virtual channel states + * @S3C24XX_DMA_CHAN_IDLE: the channel is idle + * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport + * channel and is running a transfer on it + * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport + * channel to become available (only pertains to memcpy channels) + */ +enum s3c24xx_dma_chan_state { + S3C24XX_DMA_CHAN_IDLE, + S3C24XX_DMA_CHAN_RUNNING, + S3C24XX_DMA_CHAN_WAITING, +}; + +/* + * struct s3c24xx_sg - structure containing data per sg + * @src_addr: src address of sg + * @dst_addr: dst address of sg + * @len: transfer len in bytes + * @node: node for txd's dsg_list + */ +struct s3c24xx_sg { + dma_addr_t src_addr; + dma_addr_t dst_addr; + size_t len; + struct list_head node; +}; + +/* + * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor + * @vd: virtual DMA descriptor + * @dsg_list: list of children sg's + * @at: sg currently being transfered + * @width: transfer width + * @disrcc: value for source control register + * @didstc: value for destination control register + * @dcon: base value for dcon register + * @cyclic: indicate cyclic transfer + */ +struct s3c24xx_txd { + struct virt_dma_desc vd; + struct list_head dsg_list; + struct list_head *at; + u8 width; + u32 disrcc; + u32 didstc; + u32 dcon; + bool cyclic; +}; + +struct s3c24xx_dma_chan; + +/* + * struct s3c24xx_dma_phy - holder for the physical channels + * @id: physical index to this channel + * @valid: does the channel have all required elements + * @base: virtual memory base (remapped) for the this channel + * @irq: interrupt for this channel + * @clk: clock for this channel + * @lock: a lock to use when altering an instance of this struct + * @serving: virtual channel currently being served by this physicalchannel + * @host: a pointer to the host (internal use) + */ +struct s3c24xx_dma_phy { + unsigned int id; + bool valid; + void __iomem *base; + int irq; + struct clk *clk; + spinlock_t lock; + struct s3c24xx_dma_chan *serving; + struct s3c24xx_dma_engine *host; +}; + +/* + * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel + * @id: the id of the channel + * @name: name of the channel + * @vc: wrapped virtual channel + * @phy: the physical channel utilized by this channel, if there is one + * @runtime_addr: address for RX/TX according to the runtime config + * @at: active transaction on this channel + * @lock: a lock for this channel data + * @host: a pointer to the host (internal use) + * @state: whether the channel is idle, running etc + * @slave: whether this channel is a device (slave) or for memcpy + */ +struct s3c24xx_dma_chan { + int id; + const char *name; + struct virt_dma_chan vc; + struct s3c24xx_dma_phy *phy; + struct dma_slave_config cfg; + struct s3c24xx_txd *at; + struct s3c24xx_dma_engine *host; + enum s3c24xx_dma_chan_state state; + bool slave; +}; + +/* + * struct s3c24xx_dma_engine - the local state holder for the S3C24XX + * @pdev: the corresponding platform device + * @pdata: platform data passed in from the platform/machine + * @base: virtual memory base (remapped) + * @slave: slave engine for this instance + * @memcpy: memcpy engine for this instance + * @phy_chans: array of data for the physical channels + */ +struct s3c24xx_dma_engine { + struct platform_device *pdev; + const struct s3c24xx_dma_platdata *pdata; + struct soc_data *sdata; + void __iomem *base; + struct dma_device slave; + struct dma_device memcpy; + struct s3c24xx_dma_phy *phy_chans; +}; + +/* + * Physical channel handling + */ + +/* + * Check whether a certain channel is busy or not. + */ +static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy) +{ + unsigned int val = readl(phy->base + S3C24XX_DSTAT); + return val & S3C24XX_DSTAT_STAT_BUSY; +} + +static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan, + struct s3c24xx_dma_phy *phy) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + int phyvalid; + + /* every phy is valid for memcopy channels */ + if (!s3cchan->slave) + return true; + + /* On newer variants all phys can be used for all virtual channels */ + if (s3cdma->sdata->has_reqsel) + return true; + + phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH)); + return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false; +} + +/* + * Allocate a physical channel for a virtual channel + * + * Try to locate a physical channel to be used for this transfer. If all + * are taken return NULL and the requester will have to cope by using + * some fallback PIO mode or retrying later. + */ +static +struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy = NULL; + unsigned long flags; + int i; + int ret; + + for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { + phy = &s3cdma->phy_chans[i]; + + if (!phy->valid) + continue; + + if (!s3c24xx_dma_phy_valid(s3cchan, phy)) + continue; + + spin_lock_irqsave(&phy->lock, flags); + + if (!phy->serving) { + phy->serving = s3cchan; + spin_unlock_irqrestore(&phy->lock, flags); + break; + } + + spin_unlock_irqrestore(&phy->lock, flags); + } + + /* No physical channel available, cope with it */ + if (i == s3cdma->pdata->num_phy_channels) { + dev_warn(&s3cdma->pdev->dev, "no phy channel available\n"); + return NULL; + } + + /* start the phy clock */ + if (s3cdma->sdata->has_clocks) { + ret = clk_enable(phy->clk); + if (ret) { + dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n", + phy->id, ret); + phy->serving = NULL; + return NULL; + } + } + + return phy; +} + +/* + * Mark the physical channel as free. + * + * This drops the link between the physical and virtual channel. + */ +static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy) +{ + struct s3c24xx_dma_engine *s3cdma = phy->host; + + if (s3cdma->sdata->has_clocks) + clk_disable(phy->clk); + + phy->serving = NULL; +} + +/* + * Stops the channel by writing the stop bit. + * This should not be used for an on-going transfer, but as a method of + * shutting down a channel (eg, when it's no longer used) or terminating a + * transfer. + */ +static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy) +{ + writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG); +} + +/* + * Virtual channel handling + */ + +static inline +struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct s3c24xx_dma_chan, vc.chan); +} + +static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_phy *phy = s3cchan->phy; + struct s3c24xx_txd *txd = s3cchan->at; + u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK; + + return tc * txd->width; +} + +static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + unsigned long flags; + int ret = 0; + + /* Reject definitely invalid configurations */ + if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES || + config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) + return -EINVAL; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + + if (!s3cchan->slave) { + ret = -EINVAL; + goto out; + } + + s3cchan->cfg = *config; + +out: + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + return ret; +} + +/* + * Transfer handling + */ + +static inline +struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct s3c24xx_txd, vd.tx); +} + +static struct s3c24xx_txd *s3c24xx_dma_get_txd(void) +{ + struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + + if (txd) { + INIT_LIST_HEAD(&txd->dsg_list); + txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD; + } + + return txd; +} + +static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd) +{ + struct s3c24xx_sg *dsg, *_dsg; + + list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) { + list_del(&dsg->node); + kfree(dsg); + } + + kfree(txd); +} + +static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan, + struct s3c24xx_txd *txd) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy = s3cchan->phy; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node); + u32 dcon = txd->dcon; + u32 val; + + /* transfer-size and -count from len and width */ + switch (txd->width) { + case 1: + dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len; + break; + case 2: + dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2); + break; + case 4: + dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4); + break; + } + + if (s3cchan->slave) { + struct s3c24xx_dma_channel *cdata = + &pdata->channels[s3cchan->id]; + + if (s3cdma->sdata->has_reqsel) { + writel_relaxed((cdata->chansel << 1) | + S3C24XX_DMAREQSEL_HW, + phy->base + S3C24XX_DMAREQSEL); + } else { + int csel = cdata->chansel >> (phy->id * + S3C24XX_CHANSEL_WIDTH); + + csel &= S3C24XX_CHANSEL_REQ_MASK; + dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT; + dcon |= S3C24XX_DCON_HWTRIG; + } + } else { + if (s3cdma->sdata->has_reqsel) + writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL); + } + + writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC); + writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC); + writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST); + writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC); + writel_relaxed(dcon, phy->base + S3C24XX_DCON); + + val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG); + val &= ~S3C24XX_DMASKTRIG_STOP; + val |= S3C24XX_DMASKTRIG_ON; + + /* trigger the dma operation for memcpy transfers */ + if (!s3cchan->slave) + val |= S3C24XX_DMASKTRIG_SWTRIG; + + writel(val, phy->base + S3C24XX_DMASKTRIG); +} + +/* + * Set the initial DMA register values and start first sg. + */ +static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_phy *phy = s3cchan->phy; + struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc); + struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); + + list_del(&txd->vd.node); + + s3cchan->at = txd; + + /* Wait for channel inactive */ + while (s3c24xx_dma_phy_busy(phy)) + cpu_relax(); + + /* point to the first element of the sg list */ + txd->at = txd->dsg_list.next; + s3c24xx_dma_start_next_sg(s3cchan, txd); +} + +/* + * Try to allocate a physical channel. When successful, assign it to + * this virtual channel, and initiate the next descriptor. The + * virtual channel lock must be held at this point. + */ +static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_phy *phy; + + phy = s3c24xx_dma_get_phy(s3cchan); + if (!phy) { + dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n", + s3cchan->name); + s3cchan->state = S3C24XX_DMA_CHAN_WAITING; + return; + } + + dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n", + phy->id, s3cchan->name); + + s3cchan->phy = phy; + s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; + + s3c24xx_dma_start_next_txd(s3cchan); +} + +static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy, + struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + + dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n", + phy->id, s3cchan->name); + + /* + * We do this without taking the lock; we're really only concerned + * about whether this pointer is NULL or not, and we're guaranteed + * that this will only be called when it _already_ is non-NULL. + */ + phy->serving = s3cchan; + s3cchan->phy = phy; + s3cchan->state = S3C24XX_DMA_CHAN_RUNNING; + s3c24xx_dma_start_next_txd(s3cchan); +} + +/* + * Free a physical DMA channel, potentially reallocating it to another + * virtual channel if we have any pending. + */ +static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan) +{ + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_dma_chan *p, *next; + +retry: + next = NULL; + + /* Find a waiting virtual channel for the next transfer. */ + list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node) + if (p->state == S3C24XX_DMA_CHAN_WAITING) { + next = p; + break; + } + + if (!next) { + list_for_each_entry(p, &s3cdma->slave.channels, + vc.chan.device_node) + if (p->state == S3C24XX_DMA_CHAN_WAITING && + s3c24xx_dma_phy_valid(p, s3cchan->phy)) { + next = p; + break; + } + } + + /* Ensure that the physical channel is stopped */ + s3c24xx_dma_terminate_phy(s3cchan->phy); + + if (next) { + bool success; + + /* + * Eww. We know this isn't going to deadlock + * but lockdep probably doesn't. + */ + spin_lock(&next->vc.lock); + /* Re-check the state now that we have the lock */ + success = next->state == S3C24XX_DMA_CHAN_WAITING; + if (success) + s3c24xx_dma_phy_reassign_start(s3cchan->phy, next); + spin_unlock(&next->vc.lock); + + /* If the state changed, try to find another channel */ + if (!success) + goto retry; + } else { + /* No more jobs, so free up the physical channel */ + s3c24xx_dma_put_phy(s3cchan->phy); + } + + s3cchan->phy = NULL; + s3cchan->state = S3C24XX_DMA_CHAN_IDLE; +} + +static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd) +{ + struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx); + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan); + + if (!s3cchan->slave) + dma_descriptor_unmap(&vd->tx); + + s3c24xx_dma_free_txd(txd); +} + +static irqreturn_t s3c24xx_dma_irq(int irq, void *data) +{ + struct s3c24xx_dma_phy *phy = data; + struct s3c24xx_dma_chan *s3cchan = phy->serving; + struct s3c24xx_txd *txd; + + dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id); + + /* + * Interrupts happen to notify the completion of a transfer and the + * channel should have moved into its stop state already on its own. + * Therefore interrupts on channels not bound to a virtual channel + * should never happen. Nevertheless send a terminate command to the + * channel if the unlikely case happens. + */ + if (unlikely(!s3cchan)) { + dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n", + phy->id); + + s3c24xx_dma_terminate_phy(phy); + + return IRQ_HANDLED; + } + + spin_lock(&s3cchan->vc.lock); + txd = s3cchan->at; + if (txd) { + /* when more sg's are in this txd, start the next one */ + if (!list_is_last(txd->at, &txd->dsg_list)) { + txd->at = txd->at->next; + if (txd->cyclic) + vchan_cyclic_callback(&txd->vd); + s3c24xx_dma_start_next_sg(s3cchan, txd); + } else if (!txd->cyclic) { + s3cchan->at = NULL; + vchan_cookie_complete(&txd->vd); + + /* + * And start the next descriptor (if any), + * otherwise free this channel. + */ + if (vchan_next_desc(&s3cchan->vc)) + s3c24xx_dma_start_next_txd(s3cchan); + else + s3c24xx_dma_phy_free(s3cchan); + } else { + vchan_cyclic_callback(&txd->vd); + + /* Cyclic: reset at beginning */ + txd->at = txd->dsg_list.next; + s3c24xx_dma_start_next_sg(s3cchan, txd); + } + } + spin_unlock(&s3cchan->vc.lock); + + return IRQ_HANDLED; +} + +/* + * The DMA ENGINE API + */ + +static int s3c24xx_dma_terminate_all(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + LIST_HEAD(head); + unsigned long flags; + int ret; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + + if (!s3cchan->phy && !s3cchan->at) { + dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n", + s3cchan->id); + ret = -EINVAL; + goto unlock; + } + + s3cchan->state = S3C24XX_DMA_CHAN_IDLE; + + /* Mark physical channel as free */ + if (s3cchan->phy) + s3c24xx_dma_phy_free(s3cchan); + + /* Dequeue current job */ + if (s3cchan->at) { + vchan_terminate_vdesc(&s3cchan->at->vd); + s3cchan->at = NULL; + } + + /* Dequeue jobs not yet fired as well */ + + vchan_get_all_descriptors(&s3cchan->vc, &head); + + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + vchan_dma_desc_free_list(&s3cchan->vc, &head); + + return 0; + +unlock: + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + return ret; +} + +static void s3c24xx_dma_synchronize(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + + vchan_synchronize(&s3cchan->vc); +} + +static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan) +{ + /* Ensure all queued descriptors are freed */ + vchan_free_chan_resources(to_virt_chan(chan)); +} + +static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + ret = dma_cookie_status(chan, cookie, txstate); + + /* + * There's no point calculating the residue if there's + * no txstate to store the value. + */ + if (ret == DMA_COMPLETE || !txstate) { + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + return ret; + } + + vd = vchan_find_desc(&s3cchan->vc, cookie); + if (vd) { + /* On the issued list, so hasn't been processed yet */ + txd = to_s3c24xx_txd(&vd->tx); + + list_for_each_entry(dsg, &txd->dsg_list, node) + bytes += dsg->len; + } else { + /* + * Currently running, so sum over the pending sg's and + * the currently active one. + */ + txd = s3cchan->at; + + dsg = list_entry(txd->at, struct s3c24xx_sg, node); + list_for_each_entry_from(dsg, &txd->dsg_list, node) + bytes += dsg->len; + + bytes += s3c24xx_dma_getbytes_chan(s3cchan); + } + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); + + /* + * This cookie not complete yet + * Get number of bytes left in the active transactions and queue + */ + dma_set_residue(txstate, bytes); + + /* Whether waiting or running, we're in progress */ + return ret; +} + +/* + * Initialize a descriptor to be used by memcpy submit + */ +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + int src_mod, dest_mod; + + dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n", + len, s3cchan->name); + + if ((len & S3C24XX_DCON_TC_MASK) != len) { + dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len); + return NULL; + } + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->src_addr = src; + dsg->dst_addr = dest; + dsg->len = len; + + /* + * Determine a suitable transfer width. + * The DMA controller cannot fetch/store information which is not + * naturally aligned on the bus, i.e., a 4 byte fetch must start at + * an address divisible by 4 - more generally addr % width must be 0. + */ + src_mod = src % 4; + dest_mod = dest % 4; + switch (len % 4) { + case 0: + txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1; + break; + case 2: + txd->width = ((src_mod == 2 || src_mod == 0) && + (dest_mod == 2 || dest_mod == 0)) ? 2 : 1; + break; + default: + txd->width = 1; + break; + } + + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT; + txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK | + S3C24XX_DCON_SERV_WHOLE; + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, + enum dma_transfer_direction direction, unsigned long flags) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + unsigned sg_len; + dma_addr_t slave_addr; + u32 hwcfg = 0; + int i; + + dev_dbg(&s3cdma->pdev->dev, + "prepare cyclic transaction of %zu bytes with period %zu from %s\n", + size, period, s3cchan->name); + + if (!is_slave_direction(direction)) { + dev_err(&s3cdma->pdev->dev, + "direction %d unsupported\n", direction); + return NULL; + } + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + txd->cyclic = 1; + + if (cdata->handshake) + txd->dcon |= S3C24XX_DCON_HANDSHAKE; + + switch (cdata->bus) { + case S3C24XX_DMA_APB: + txd->dcon |= S3C24XX_DCON_SYNC_PCLK; + hwcfg |= S3C24XX_DISRCC_LOC_APB; + break; + case S3C24XX_DMA_AHB: + txd->dcon |= S3C24XX_DCON_SYNC_HCLK; + hwcfg |= S3C24XX_DISRCC_LOC_AHB; + break; + } + + /* + * Always assume our peripheral desintation is a fixed + * address in memory. + */ + hwcfg |= S3C24XX_DISRCC_INC_FIXED; + + /* + * Individual dma operations are requested by the slave, + * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). + */ + txd->dcon |= S3C24XX_DCON_SERV_SINGLE; + + if (direction == DMA_MEM_TO_DEV) { + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | + S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = hwcfg; + slave_addr = s3cchan->cfg.dst_addr; + txd->width = s3cchan->cfg.dst_addr_width; + } else { + txd->disrcc = hwcfg; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | + S3C24XX_DIDSTC_INC_INCREMENT; + slave_addr = s3cchan->cfg.src_addr; + txd->width = s3cchan->cfg.src_addr_width; + } + + sg_len = size / period; + + for (i = 0; i < sg_len; i++) { + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = period; + /* Check last period length */ + if (i == sg_len - 1) + dsg->len = size - period * i; + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = addr + period * i; + dsg->dst_addr = slave_addr; + } else { /* DMA_DEV_TO_MEM */ + dsg->src_addr = slave_addr; + dsg->dst_addr = addr + period * i; + } + } + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + struct s3c24xx_dma_engine *s3cdma = s3cchan->host; + const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata; + struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id]; + struct s3c24xx_txd *txd; + struct s3c24xx_sg *dsg; + struct scatterlist *sg; + dma_addr_t slave_addr; + u32 hwcfg = 0; + int tmp; + + dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n", + sg_dma_len(sgl), s3cchan->name); + + txd = s3c24xx_dma_get_txd(); + if (!txd) + return NULL; + + if (cdata->handshake) + txd->dcon |= S3C24XX_DCON_HANDSHAKE; + + switch (cdata->bus) { + case S3C24XX_DMA_APB: + txd->dcon |= S3C24XX_DCON_SYNC_PCLK; + hwcfg |= S3C24XX_DISRCC_LOC_APB; + break; + case S3C24XX_DMA_AHB: + txd->dcon |= S3C24XX_DCON_SYNC_HCLK; + hwcfg |= S3C24XX_DISRCC_LOC_AHB; + break; + } + + /* + * Always assume our peripheral desintation is a fixed + * address in memory. + */ + hwcfg |= S3C24XX_DISRCC_INC_FIXED; + + /* + * Individual dma operations are requested by the slave, + * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE). + */ + txd->dcon |= S3C24XX_DCON_SERV_SINGLE; + + if (direction == DMA_MEM_TO_DEV) { + txd->disrcc = S3C24XX_DISRCC_LOC_AHB | + S3C24XX_DISRCC_INC_INCREMENT; + txd->didstc = hwcfg; + slave_addr = s3cchan->cfg.dst_addr; + txd->width = s3cchan->cfg.dst_addr_width; + } else if (direction == DMA_DEV_TO_MEM) { + txd->disrcc = hwcfg; + txd->didstc = S3C24XX_DIDSTC_LOC_AHB | + S3C24XX_DIDSTC_INC_INCREMENT; + slave_addr = s3cchan->cfg.src_addr; + txd->width = s3cchan->cfg.src_addr_width; + } else { + s3c24xx_dma_free_txd(txd); + dev_err(&s3cdma->pdev->dev, + "direction %d unsupported\n", direction); + return NULL; + } + + for_each_sg(sgl, sg, sg_len, tmp) { + dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT); + if (!dsg) { + s3c24xx_dma_free_txd(txd); + return NULL; + } + list_add_tail(&dsg->node, &txd->dsg_list); + + dsg->len = sg_dma_len(sg); + if (direction == DMA_MEM_TO_DEV) { + dsg->src_addr = sg_dma_address(sg); + dsg->dst_addr = slave_addr; + } else { /* DMA_DEV_TO_MEM */ + dsg->src_addr = slave_addr; + dsg->dst_addr = sg_dma_address(sg); + } + } + + return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags); +} + +/* + * Slave transactions callback to the slave device to allow + * synchronization of slave DMA signals with the DMAC enable + */ +static void s3c24xx_dma_issue_pending(struct dma_chan *chan) +{ + struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&s3cchan->vc.lock, flags); + if (vchan_issue_pending(&s3cchan->vc)) { + if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING) + s3c24xx_dma_phy_alloc_and_start(s3cchan); + } + spin_unlock_irqrestore(&s3cchan->vc.lock, flags); +} + +/* + * Bringup and teardown + */ + +/* + * Initialise the DMAC memcpy/slave channels. + * Make a local wrapper to hold required data + */ +static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma, + struct dma_device *dmadev, unsigned int channels, bool slave) +{ + struct s3c24xx_dma_chan *chan; + int i; + + INIT_LIST_HEAD(&dmadev->channels); + + /* + * Register as many memcpy as we have physical channels, + * we won't always be able to use all but the code will have + * to cope with that situation. + */ + for (i = 0; i < channels; i++) { + chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->id = i; + chan->host = s3cdma; + chan->state = S3C24XX_DMA_CHAN_IDLE; + + if (slave) { + chan->slave = true; + chan->name = kasprintf(GFP_KERNEL, "slave%d", i); + if (!chan->name) + return -ENOMEM; + } else { + chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); + if (!chan->name) + return -ENOMEM; + } + dev_dbg(dmadev->dev, + "initialize virtual channel \"%s\"\n", + chan->name); + + chan->vc.desc_free = s3c24xx_dma_desc_free; + vchan_init(&chan->vc, dmadev); + } + dev_info(dmadev->dev, "initialized %d virtual %s channels\n", + i, slave ? "slave" : "memcpy"); + return i; +} + +static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev) +{ + struct s3c24xx_dma_chan *chan = NULL; + struct s3c24xx_dma_chan *next; + + list_for_each_entry_safe(chan, + next, &dmadev->channels, vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } +} + +/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */ +static struct soc_data soc_s3c2410 = { + .stride = 0x40, + .has_reqsel = false, + .has_clocks = false, +}; + +/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */ +static struct soc_data soc_s3c2412 = { + .stride = 0x40, + .has_reqsel = true, + .has_clocks = true, +}; + +/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */ +static struct soc_data soc_s3c2443 = { + .stride = 0x100, + .has_reqsel = true, + .has_clocks = true, +}; + +static const struct platform_device_id s3c24xx_dma_driver_ids[] = { + { + .name = "s3c2410-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2410, + }, { + .name = "s3c2412-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2412, + }, { + .name = "s3c2443-dma", + .driver_data = (kernel_ulong_t)&soc_s3c2443, + }, + { }, +}; + +static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev) +{ + return (struct soc_data *) + platform_get_device_id(pdev)->driver_data; +} + +static int s3c24xx_dma_probe(struct platform_device *pdev) +{ + const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); + struct s3c24xx_dma_engine *s3cdma; + struct soc_data *sdata; + struct resource *res; + int ret; + int i; + + if (!pdata) { + dev_err(&pdev->dev, "platform data missing\n"); + return -ENODEV; + } + + /* Basic sanity check */ + if (pdata->num_phy_channels > MAX_DMA_CHANNELS) { + dev_err(&pdev->dev, "too many dma channels %d, max %d\n", + pdata->num_phy_channels, MAX_DMA_CHANNELS); + return -EINVAL; + } + + sdata = s3c24xx_dma_get_soc_data(pdev); + if (!sdata) + return -EINVAL; + + s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL); + if (!s3cdma) + return -ENOMEM; + + s3cdma->pdev = pdev; + s3cdma->pdata = pdata; + s3cdma->sdata = sdata; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + s3cdma->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(s3cdma->base)) + return PTR_ERR(s3cdma->base); + + s3cdma->phy_chans = devm_kcalloc(&pdev->dev, + pdata->num_phy_channels, + sizeof(struct s3c24xx_dma_phy), + GFP_KERNEL); + if (!s3cdma->phy_chans) + return -ENOMEM; + + /* acquire irqs and clocks for all physical channels */ + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + char clk_name[6]; + + phy->id = i; + phy->base = s3cdma->base + (i * sdata->stride); + phy->host = s3cdma; + + phy->irq = platform_get_irq(pdev, i); + if (phy->irq < 0) + continue; + + ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq, + 0, pdev->name, phy); + if (ret) { + dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n", + i, ret); + continue; + } + + if (sdata->has_clocks) { + sprintf(clk_name, "dma.%d", i); + phy->clk = devm_clk_get(&pdev->dev, clk_name); + if (IS_ERR(phy->clk) && sdata->has_clocks) { + dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n", + i, PTR_ERR(phy->clk)); + continue; + } + + ret = clk_prepare(phy->clk); + if (ret) { + dev_err(&pdev->dev, "clock for phy %d failed, error %d\n", + i, ret); + continue; + } + } + + spin_lock_init(&phy->lock); + phy->valid = true; + + dev_dbg(&pdev->dev, "physical channel %d is %s\n", + i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE"); + } + + /* Initialize memcpy engine */ + dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask); + dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask); + s3cdma->memcpy.dev = &pdev->dev; + s3cdma->memcpy.device_free_chan_resources = + s3c24xx_dma_free_chan_resources; + s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy; + s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status; + s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending; + s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config; + s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all; + s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize; + + /* Initialize slave engine for SoC internal dedicated peripherals */ + dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask); + s3cdma->slave.dev = &pdev->dev; + s3cdma->slave.device_free_chan_resources = + s3c24xx_dma_free_chan_resources; + s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status; + s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending; + s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg; + s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic; + s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config; + s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all; + s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize; + s3cdma->slave.filter.map = pdata->slave_map; + s3cdma->slave.filter.mapcnt = pdata->slavecnt; + s3cdma->slave.filter.fn = s3c24xx_dma_filter; + + /* Register as many memcpy channels as there are physical channels */ + ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy, + pdata->num_phy_channels, false); + if (ret <= 0) { + dev_warn(&pdev->dev, + "%s failed to enumerate memcpy channels - %d\n", + __func__, ret); + goto err_memcpy; + } + + /* Register slave channels */ + ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave, + pdata->num_channels, true); + if (ret <= 0) { + dev_warn(&pdev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto err_slave; + } + + ret = dma_async_device_register(&s3cdma->memcpy); + if (ret) { + dev_warn(&pdev->dev, + "%s failed to register memcpy as an async device - %d\n", + __func__, ret); + goto err_memcpy_reg; + } + + ret = dma_async_device_register(&s3cdma->slave); + if (ret) { + dev_warn(&pdev->dev, + "%s failed to register slave as an async device - %d\n", + __func__, ret); + goto err_slave_reg; + } + + platform_set_drvdata(pdev, s3cdma); + dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n", + pdata->num_phy_channels); + + return 0; + +err_slave_reg: + dma_async_device_unregister(&s3cdma->memcpy); +err_memcpy_reg: + s3c24xx_dma_free_virtual_channels(&s3cdma->slave); +err_slave: + s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); +err_memcpy: + if (sdata->has_clocks) + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + if (phy->valid) + clk_unprepare(phy->clk); + } + + return ret; +} + +static void s3c24xx_dma_free_irq(struct platform_device *pdev, + struct s3c24xx_dma_engine *s3cdma) +{ + int i; + + for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + + devm_free_irq(&pdev->dev, phy->irq, phy); + } +} + +static int s3c24xx_dma_remove(struct platform_device *pdev) +{ + const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev); + struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev); + struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev); + int i; + + dma_async_device_unregister(&s3cdma->slave); + dma_async_device_unregister(&s3cdma->memcpy); + + s3c24xx_dma_free_irq(pdev, s3cdma); + + s3c24xx_dma_free_virtual_channels(&s3cdma->slave); + s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy); + + if (sdata->has_clocks) + for (i = 0; i < pdata->num_phy_channels; i++) { + struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i]; + if (phy->valid) + clk_unprepare(phy->clk); + } + + return 0; +} + +static struct platform_driver s3c24xx_dma_driver = { + .driver = { + .name = "s3c24xx-dma", + }, + .id_table = s3c24xx_dma_driver_ids, + .probe = s3c24xx_dma_probe, + .remove = s3c24xx_dma_remove, +}; + +module_platform_driver(s3c24xx_dma_driver); + +bool s3c24xx_dma_filter(struct dma_chan *chan, void *param) +{ + struct s3c24xx_dma_chan *s3cchan; + + if (chan->device->dev->driver != &s3c24xx_dma_driver.driver) + return false; + + s3cchan = to_s3c24xx_dma_chan(chan); + + return s3cchan->id == (uintptr_t)param; +} +EXPORT_SYMBOL(s3c24xx_dma_filter); + +MODULE_DESCRIPTION("S3C24XX DMA Driver"); +MODULE_AUTHOR("Heiko Stuebner"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h new file mode 100644 index 0000000000000..96d02dbeea67a --- /dev/null +++ b/include/linux/platform_data/dma-s3c24xx.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * S3C24XX DMA handling + * + * Copyright (c) 2013 Heiko Stuebner + */ + +/* Helper to encode the source selection constraints for early s3c socs. */ +#define S3C24XX_DMA_CHANREQ(src, chan) ((BIT(3) | src) << chan * 4) + +enum s3c24xx_dma_bus { + S3C24XX_DMA_APB, + S3C24XX_DMA_AHB, +}; + +/** + * @bus: on which bus does the peripheral reside - AHB or APB. + * @handshake: is a handshake with the peripheral necessary + * @chansel: channel selection information, depending on variant; reqsel for + * s3c2443 and later and channel-selection map for earlier SoCs + * see CHANSEL doc in s3c2443-dma.c + */ +struct s3c24xx_dma_channel { + enum s3c24xx_dma_bus bus; + bool handshake; + u16 chansel; +}; + +struct dma_slave_map; + +/** + * struct s3c24xx_dma_platdata - platform specific settings + * @num_phy_channels: number of physical channels + * @channels: array of virtual channel descriptions + * @num_channels: number of virtual channels + * @slave_map: dma slave map matching table + * @slavecnt: number of elements in slave_map + */ +struct s3c24xx_dma_platdata { + int num_phy_channels; + struct s3c24xx_dma_channel *channels; + int num_channels; + const struct dma_slave_map *slave_map; + int slavecnt; +}; + +struct dma_chan; +bool s3c24xx_dma_filter(struct dma_chan *chan, void *param); -- GitLab From af5d74e32eb8e1b833f687047f0ffe3801d7229d Mon Sep 17 00:00:00 2001 From: Xu Panda Date: Sat, 3 Dec 2022 14:10:56 +0800 Subject: [PATCH 520/694] m68k: use strscpy() to instead of strncpy() The implementation of strscpy() is more robust and safer. Signed-off-by: Xu Panda Signed-off-by: Yang Yang Signed-off-by: Greg Ungerer --- arch/m68k/kernel/setup_no.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index cb6def585851e..37fb663559b47 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -90,8 +90,7 @@ void __init setup_arch(char **cmdline_p) config_BSP(&command_line[0], sizeof(command_line)); #if defined(CONFIG_BOOTPARAM) - strncpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line)); - command_line[sizeof(command_line) - 1] = 0; + strscpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line)); #endif /* CONFIG_BOOTPARAM */ process_uboot_commandline(&command_line[0], sizeof(command_line)); -- GitLab From 8f4ab7da904ab7027ccd43ddb4f0094e932a5877 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 5 Dec 2022 12:44:27 +0400 Subject: [PATCH 521/694] selftests/powerpc: Fix resource leaks In check_all_cpu_dscr_defaults, opendir() opens the directory stream. Add missing closedir() in the error path to release it. In check_cpu_dscr_default, open() creates an open file descriptor. Add missing close() in the error path to release it. Fixes: ebd5858c904b ("selftests/powerpc: Add test for all DSCR sysfs interfaces") Signed-off-by: Miaoqian Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221205084429.570654-1-linmq006@gmail.com --- tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index fbbdffdb2e5d2..f20d1c166d1e4 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val) rc = read(fd, buf, sizeof(buf)); if (rc == -1) { perror("read() failed"); + close(fd); return 1; } close(fd); @@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val) if (access(file, F_OK)) continue; - if (check_cpu_dscr_default(file, val)) + if (check_cpu_dscr_default(file, val)) { + closedir(sysfs); return 1; + } } closedir(sysfs); return 0; -- GitLab From dcb40e9fcce9bd251eaff19f3724131db522846c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:53 +0800 Subject: [PATCH 522/694] iommu/mediatek: Add platform_device_put for recovering the device refcnt Add platform_device_put to match with of_find_device_by_node. Meanwhile, I add a new variable "pcommdev" which is for smi common device. Otherwise, "platform_device_put(plarbdev)" for smi-common dev may be not readable. And add a checking for whether pcommdev is NULL. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-2-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c80f33dd2d434..cce948eead6a7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1055,7 +1055,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m struct mtk_iommu_data *data) { struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; - struct platform_device *plarbdev; + struct platform_device *plarbdev, *pcommdev; struct device_link *link; int i, larb_nr, ret; @@ -1086,12 +1086,14 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m } if (!plarbdev->dev.driver) { of_node_put(larbnode); + platform_device_put(plarbdev); return -EPROBE_DEFER; } data->larb_imu[id].dev = &plarbdev->dev; component_match_add_release(dev, match, component_release_of, component_compare_of, larbnode); + platform_device_put(plarbdev); } /* Get smi-(sub)-common dev from the last larb. */ @@ -1109,12 +1111,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m else smicomm_node = smi_subcomm_node; - plarbdev = of_find_device_by_node(smicomm_node); + pcommdev = of_find_device_by_node(smicomm_node); of_node_put(smicomm_node); - data->smicomm_dev = &plarbdev->dev; + if (!pcommdev) + return -ENODEV; + data->smicomm_dev = &pcommdev->dev; link = device_link_add(data->smicomm_dev, dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + platform_device_put(pcommdev); if (!link) { dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev)); return -EINVAL; -- GitLab From b5765a1b44bea9dfcae69c53ffeb4c689d0922a7 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:54 +0800 Subject: [PATCH 523/694] iommu/mediatek: Use component_match_add In order to simplify the error patch(avoid call of_node_put), Use component_match_add instead component_match_add_release since we are only interested in the "device" here. Then we could always call of_node_put in normal path. Strictly this is not a fixes patch, but it is a prepare for adding the error path, thus I add a Fixes tag too. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Suggested-by: Robin Murphy Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-3-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index cce948eead6a7..df490236e1fb5 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1080,19 +1080,17 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m id = i; plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) { - of_node_put(larbnode); + of_node_put(larbnode); + if (!plarbdev) return -ENODEV; - } + if (!plarbdev->dev.driver) { - of_node_put(larbnode); platform_device_put(plarbdev); return -EPROBE_DEFER; } data->larb_imu[id].dev = &plarbdev->dev; - component_match_add_release(dev, match, component_release_of, - component_compare_of, larbnode); + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); } -- GitLab From 26593928564cf5b576ff05d3cbd958f57c9534bb Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:55 +0800 Subject: [PATCH 524/694] iommu/mediatek: Add error path for loop of mm_dts_parse The mtk_iommu_mm_dts_parse will parse the smi larbs nodes. if the i+1 larb is parsed fail, we should put_device for the i..0 larbs. There are two places need to comment: 1) The larbid may be not linear mapping, we should loop whole the array in the error path. 2) I move this line position: "data->larb_imu[id].dev = &plarbdev->dev;" before "if (!plarbdev->dev.driver)", That means set data->larb_imu[id].dev before the error path. then we don't need "platform_device_put(plarbdev)" again in probe_defer case. All depend on "put_device" of the error path in error cases. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-4-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index df490236e1fb5..e0c669b75271d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1067,8 +1067,10 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); - if (!larbnode) - return -EINVAL; + if (!larbnode) { + ret = -EINVAL; + goto err_larbdev_put; + } if (!of_device_is_available(larbnode)) { of_node_put(larbnode); @@ -1081,14 +1083,16 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m plarbdev = of_find_device_by_node(larbnode); of_node_put(larbnode); - if (!plarbdev) - return -ENODEV; + if (!plarbdev) { + ret = -ENODEV; + goto err_larbdev_put; + } + data->larb_imu[id].dev = &plarbdev->dev; if (!plarbdev->dev.driver) { - platform_device_put(plarbdev); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto err_larbdev_put; } - data->larb_imu[id].dev = &plarbdev->dev; component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); @@ -1123,6 +1127,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return -EINVAL; } return 0; + +err_larbdev_put: + /* id may be not linear mapping, loop whole the array */ + for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) { + if (!data->larb_imu[i].dev) + continue; + put_device(data->larb_imu[i].dev); + } + return ret; } static int mtk_iommu_probe(struct platform_device *pdev) -- GitLab From ef693a8440926884bfd9cc3d6d36f65719513350 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 18 Oct 2022 10:42:56 +0800 Subject: [PATCH 525/694] iommu/mediatek: Validate number of phandles associated with "mediatek,larbs" Fix the smatch warnings: drivers/iommu/mtk_iommu.c:878 mtk_iommu_mm_dts_parse() error: uninitialized symbol 'larbnode'. If someone abuse the dtsi node(Don't follow the definition of dt-binding), for example "mediatek,larbs" is provided as boolean property, "larb_nr" will be zero and cause abnormal. To fix this problem and improve the code safety, add some checking for the invalid input from dtsi, e.g. checking the larb_nr/larbid valid range, and avoid "mediatek,larb-id" property conflicts in the smi-larb nodes. Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Guenter Roeck Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-5-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e0c669b75271d..41e96da6160f3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1062,6 +1062,8 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m larb_nr = of_count_phandle_with_args(dev->of_node, "mediatek,larbs", NULL); if (larb_nr < 0) return larb_nr; + if (larb_nr == 0 || larb_nr > MTK_LARB_NR_MAX) + return -EINVAL; for (i = 0; i < larb_nr; i++) { u32 id; @@ -1080,6 +1082,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); if (ret)/* The id is consecutive if there is no this property */ id = i; + if (id >= MTK_LARB_NR_MAX) { + of_node_put(larbnode); + ret = -EINVAL; + goto err_larbdev_put; + } plarbdev = of_find_device_by_node(larbnode); of_node_put(larbnode); @@ -1087,6 +1094,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m ret = -ENODEV; goto err_larbdev_put; } + if (data->larb_imu[id].dev) { + platform_device_put(plarbdev); + ret = -EEXIST; + goto err_larbdev_put; + } data->larb_imu[id].dev = &plarbdev->dev; if (!plarbdev->dev.driver) { -- GitLab From 6cde583d5352818a51985b32a960cdde85ab3821 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:57 +0800 Subject: [PATCH 526/694] iommu/mediatek: Improve safety for mediatek,smi property in larb nodes No functional change. Just improve safety from dts. All the larbs that connect to one IOMMU must connect with the same smi-common. This patch checks all the mediatek,smi property for each larb, If their mediatek,smi are different, it will return fails. Also avoid there is no available smi-larb nodes. Suggested-by: Guenter Roeck Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-6-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 41e96da6160f3..4a6ee25a6f99a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1054,7 +1054,7 @@ static const struct component_master_ops mtk_iommu_com_ops = { static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match, struct mtk_iommu_data *data) { - struct device_node *larbnode, *smicomm_node, *smi_subcomm_node; + struct device_node *larbnode, *frst_avail_smicomm_node = NULL; struct platform_device *plarbdev, *pcommdev; struct device_link *link; int i, larb_nr, ret; @@ -1066,6 +1066,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return -EINVAL; for (i = 0; i < larb_nr; i++) { + struct device_node *smicomm_node, *smi_subcomm_node; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); @@ -1106,27 +1107,47 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m goto err_larbdev_put; } + /* Get smi-(sub)-common dev from the last larb. */ + smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smi_subcomm_node) { + ret = -EINVAL; + goto err_larbdev_put; + } + + /* + * It may have two level smi-common. the node is smi-sub-common if it + * has a new mediatek,smi property. otherwise it is smi-commmon. + */ + smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); + if (smicomm_node) + of_node_put(smi_subcomm_node); + else + smicomm_node = smi_subcomm_node; + + /* + * All the larbs that connect to one IOMMU must connect with the same + * smi-common. + */ + if (!frst_avail_smicomm_node) { + frst_avail_smicomm_node = smicomm_node; + } else if (frst_avail_smicomm_node != smicomm_node) { + dev_err(dev, "mediatek,smi property is not right @larb%d.", id); + of_node_put(smicomm_node); + ret = -EINVAL; + goto err_larbdev_put; + } else { + of_node_put(smicomm_node); + } + component_match_add(dev, match, component_compare_dev, &plarbdev->dev); platform_device_put(plarbdev); } - /* Get smi-(sub)-common dev from the last larb. */ - smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); - if (!smi_subcomm_node) + if (!frst_avail_smicomm_node) return -EINVAL; - /* - * It may have two level smi-common. the node is smi-sub-common if it - * has a new mediatek,smi property. otherwise it is smi-commmon. - */ - smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0); - if (smicomm_node) - of_node_put(smi_subcomm_node); - else - smicomm_node = smi_subcomm_node; - - pcommdev = of_find_device_by_node(smicomm_node); - of_node_put(smicomm_node); + pcommdev = of_find_device_by_node(frst_avail_smicomm_node); + of_node_put(frst_avail_smicomm_node); if (!pcommdev) return -ENODEV; data->smicomm_dev = &pcommdev->dev; -- GitLab From 9ff894edd542618dad2fef538f8272c620a501db Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 18 Oct 2022 10:42:58 +0800 Subject: [PATCH 527/694] iommu/mediatek: Remove unused "mapping" member from mtk_iommu_data Just remove a unused variable that only is for mtk_iommu_v1. Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20221018024258.19073-7-yong.wu@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 4a6ee25a6f99a..9c1ce8b46a17b 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -229,10 +229,7 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; - - struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ struct regmap *pericfg; - struct mutex mutex; /* Protect m4u_group/m4u_dom above */ /* -- GitLab From 88699c024f9227b79af90adc929625e4b7867932 Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Tue, 22 Nov 2022 14:18:25 -0800 Subject: [PATCH 528/694] iommu/amd: Fix typo in macro parameter name IVRS_GET_SBDF_ID is only called with fn as the fourth parameter, so this had no effect, but fixing the name will avoid bugs if that ever changes. Signed-off-by: Michael Forney Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/381fbc430c0ccdd78b3b696cfc0c32b233526ca5.1669159392.git.mforney@mforney.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 34029d1161073..467b194975b30 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -85,7 +85,7 @@ #define LOOP_TIMEOUT 2000000 -#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ +#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) /* -- GitLab From ef5bb8e7a7127218f826b9ccdf7508e7a339f4c2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 10:06:28 -0400 Subject: [PATCH 529/694] iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY This driver treats IOMMU_DOMAIN_IDENTITY the same as UNMANAGED, which cannot possibly be correct. UNMANAGED domains are required to start out blocking all DMAs. This seems to be what this driver does as it allocates a first level 'dt' for the IO page table that is 0 filled. Thus UNMANAGED looks like a working IO page table, and so IDENTITY must be a mistake. Remove it. Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver") Signed-off-by: Jason Gunthorpe Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/0-v1-97f0adf27b5e+1f0-s50_identity_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 5cb2d44dfb92d..5b585eace3d46 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -672,7 +672,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) struct sun50i_iommu_domain *sun50i_domain; if (type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_UNMANAGED) return NULL; -- GitLab From 00ef8885a945c37551547d8ac8361cacd20c4e42 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 28 Nov 2022 23:16:48 +0100 Subject: [PATCH 530/694] iommu/mediatek: Fix crash on isr after kexec() If the system is rebooted via isr(), the IRQ handler might be triggered before the domain is initialized. Resulting on an invalid memory access error. Fix: [ 0.500930] Unable to handle kernel read from unreadable memory at virtual address 0000000000000070 [ 0.501166] Call trace: [ 0.501174] report_iommu_fault+0x28/0xfc [ 0.501180] mtk_iommu_isr+0x10c/0x1c0 Signed-off-by: Ricardo Ribalda Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20221125-mtk-iommu-v2-0-e168dff7d43e@chromium.org [ joro: Fixed spelling in commit message ] Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 9c1ce8b46a17b..392b8c167c44c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -462,7 +462,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm]; } - if (report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, + if (!dom || report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova, write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( bank->parent_dev, -- GitLab From 6aecc0a59e07ba895b5473e0c916ba5f3d556c15 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 5 Dec 2022 16:32:31 -0600 Subject: [PATCH 531/694] cxl: Remove unnecessary cxl_pci_window_alignment() cxl_pci_window_alignment() is referenced only via the struct pci_controller_ops.window_alignment function pointer, and only in the powerpc implementation of pcibios_window_alignment(). pcibios_window_alignment() defaults to returning 1 if the function pointer is NULL, which is the same was what cxl_pci_window_alignment() does. cxl_pci_window_alignment() is unnecessary, so remove it. No functional change intended. Signed-off-by: Bjorn Helgaas Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221205223231.1268085-1-helgaas@kernel.org --- drivers/misc/cxl/vphb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 1264253cc07bc..6332db8044bd6 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -67,12 +67,6 @@ static void cxl_pci_disable_device(struct pci_dev *dev) } } -static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, - unsigned long type) -{ - return 1; -} - static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) { /* Should we do an AFU reset here ? */ @@ -200,7 +194,6 @@ static struct pci_controller_ops cxl_pci_controller_ops = .enable_device_hook = cxl_pci_enable_device_hook, .disable_device = cxl_pci_disable_device, .release_device = cxl_pci_disable_device, - .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, .teardown_msi_irqs = cxl_teardown_msi_irqs, -- GitLab From 3ae7c96dd51025550c8001c6f833337f11d00807 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:26 +0100 Subject: [PATCH 532/694] powerpc/dts/fsl: Fix pca954x i2c-mux node names "make dtbs_check": arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dtb: pca9546@77: $nodename:0: 'pca9546@77' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/powerpc/boot/dts/fsl/t1024qds.dtb: pca9547@77: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@2', 'i2c@3' were unexpected) From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml ... Fix this by renaming pca954x nodes to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6c5d86c49ac170e9d56ab121ea0602f3873849ca.1669999298.git.geert+renesas@glider.be --- arch/powerpc/boot/dts/fsl/t1024qds.dts | 2 +- arch/powerpc/boot/dts/fsl/t1024rdb.dts | 2 +- arch/powerpc/boot/dts/fsl/t104xqds.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t208xqds.dtsi | 2 +- arch/powerpc/boot/dts/fsl/t208xrdb.dtsi | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index d6858b7cd93fb..9ea7942f914e1 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -151,7 +151,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index dbcd31cc35dcd..270aaf631f2ab 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -165,7 +165,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi index 6154797322524..1c329f076f64f 100644 --- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi @@ -268,7 +268,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index bfe1ed5be3374..fc7bec5dcb90f 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -128,7 +128,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi index db4139999b28e..962c99941645b 100644 --- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi @@ -135,7 +135,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi index ff87e67c70da4..ecc3e8c7394ca 100644 --- a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi @@ -138,7 +138,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; }; -- GitLab From 5ddcc03a07ae1ab5062f89a946d9495f1fd8eaa4 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Mon, 14 Nov 2022 20:26:11 +0530 Subject: [PATCH 533/694] powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for snooze state During the comparative study of cpuidle governors, it is noticed that the menu governor does not select CEDE state in some scenarios even though when the sleep duration of the CPU exceeds the target residency of the CEDE idle state this is because the CPU exits the snooze "polling" state when snooze time limit is reached in the snooze_loop(), which is not a real wake up and it just means that the polling state selection was not adequate. cpuidle governors rely on CPUIDLE_FLAG_POLLING flag to be set for the polling states to handle the condition mentioned above. Hence, set the CPUIDLE_FLAG_POLLING flag for snooze state (polling state) in powerpc arch to make the cpuidle governor work as expected. Reference Commits: - Timeout enabled for snooze state: commit 78eaa10f027c ("cpuidle: powernv/pseries: Auto-promotion of snooze to deeper idle state") - commit dc2251bf98c6 ("cpuidle: Eliminate the CPUIDLE_DRIVER_STATE_START symbol") - Fix wakeup stats in governor for polling states commit 5f26bdceb9c0 ("cpuidle: menu: Fix wakeup statistics updates for polling state") Signed-off-by: Aboorva Devarajan Tested-by: Vishal Chourasia Reviewed-by: Vaidyanathan Srinivasan Reviewed-by: Vishal Chourasia Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114145611.37669-1-aboorvad@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-powernv.c | 5 ++++- drivers/cpuidle/cpuidle-pseries.c | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 0b5461b3d7dd4..9ebedd972df0b 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -76,6 +76,7 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index); + dev->poll_time_limit = false; ppc64_runlatch_off(); HMT_very_low(); while (!need_resched()) { @@ -86,6 +87,7 @@ static int snooze_loop(struct cpuidle_device *dev, * cleared to order subsequent test of need_resched(). */ clear_thread_flag(TIF_POLLING_NRFLAG); + dev->poll_time_limit = true; smp_mb(); break; } @@ -155,7 +157,8 @@ static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = snooze_loop }, + .enter = snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, }; static int powernv_cpuidle_cpu_online(unsigned int cpu) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 7e7ab5597d7ac..1bad4d2b7be33 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -44,6 +44,7 @@ static int snooze_loop(struct cpuidle_device *dev, pseries_idle_prolog(); local_irq_enable(); snooze_exit_time = get_tb() + snooze_timeout; + dev->poll_time_limit = false; while (!need_resched()) { HMT_low(); @@ -54,6 +55,7 @@ static int snooze_loop(struct cpuidle_device *dev, * loop anyway. Require a barrier after polling is * cleared to order subsequent test of need_resched(). */ + dev->poll_time_limit = true; clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); break; @@ -268,7 +270,8 @@ static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = &snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, { /* CEDE */ .name = "CEDE", .desc = "CEDE", @@ -286,7 +289,8 @@ static struct cpuidle_state shared_states[] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = &snooze_loop, + .flags = CPUIDLE_FLAG_POLLING }, { /* Shared Cede */ .name = "Shared Cede", .desc = "Shared Cede", -- GitLab From e13d23a404f2e6dfaf8b1ef7d161a0836fce4fa5 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 10 Nov 2022 19:06:18 +0100 Subject: [PATCH 534/694] powerpc: export the CPU node count At boot time, the FDT is parsed to compute the number of CPUs. In addition count the number of CPU nodes and export it. This is useful when building the FDT for a kexeced kernel since we need to take in account the CPU node added since the boot time during CPU hotplug operations. Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221110180619.15796-2-ldufour@linux.ibm.com --- arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/prom.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 2e82820fbd64c..c0107d8ddd8c8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -85,6 +85,7 @@ struct of_drc_info { extern int of_read_drc_info_cell(struct property **prop, const __be32 **curval, struct of_drc_info *data); +extern unsigned int boot_cpu_node_count; /* * There are two methods for telling firmware what our capabilities are. diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1eed87d954ba8..645f4450dfc38 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -72,6 +72,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; +unsigned int boot_cpu_node_count __ro_after_init; #endif static phys_addr_t first_memblock_size; static int __initdata boot_cpu_count; @@ -335,6 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + boot_cpu_node_count++; + /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); if (!intserv) -- GitLab From 340a4a9f8773e102cc5ef531665970a686dfa245 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 10 Nov 2022 19:06:19 +0100 Subject: [PATCH 535/694] powerpc: Take in account addition CPU node when building kexec FDT On a system with a large number of CPUs, the creation of the FDT for a kexec kernel may fail because the allocated FDT is not large enough. When this happens, such a message is displayed on the console: Unable to add ibm,processor-vadd-size property: FDT_ERR_NOSPACE The property's name may change depending when the buffer overwrite is detected. Obviously the created FDT is missing information, and it is expected that system dump or kexec kernel failed to run properly. When the FDT is allocated, the size of the FDT the kernel received at boot time is used and an extra size can be applied. Currently, only memory added after boot time is taken in account, not the CPU nodes. The extra size should take in account these additional CPU nodes and compute the required extra space. To achieve that, the size of a CPU node, including its subnode is computed once and multiplied by the number of additional CPU nodes. The assumption is that the size of the CPU node is _same_ for all the node, the only variable part should be the name "PowerPC,POWERxx@##" where "##" may vary a little. Signed-off-by: Laurent Dufour [mpe: Don't shadow function name w/variable, minor coding style changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221110180619.15796-3-ldufour@linux.ibm.com --- arch/powerpc/kexec/file_load_64.c | 59 ++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 349a781cea0b3..2500c37c628c6 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -26,6 +26,7 @@ #include #include #include +#include struct umem_info { u64 *buf; /* data buffer for usable-memory property */ @@ -928,6 +929,45 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, return ret; } +/** + * get_cpu_node_size - Compute the size of a CPU node in the FDT. + * This should be done only once and the value is stored in + * a static variable. + * Returns the max size of a CPU node in the FDT. + */ +static unsigned int cpu_node_size(void) +{ + static unsigned int size; + struct device_node *dn; + struct property *pp; + + /* + * Don't compute it twice, we are assuming that the per CPU node size + * doesn't change during the system's life. + */ + if (size) + return size; + + dn = of_find_node_by_type(NULL, "cpu"); + if (WARN_ON_ONCE(!dn)) { + // Unlikely to happen + return 0; + } + + /* + * We compute the sub node size for a CPU node, assuming it + * will be the same for all. + */ + size += strlen(dn->name) + 5; + for_each_property_of_node(dn, pp) { + size += strlen(pp->name); + size += pp->length; + } + + of_node_put(dn); + return size; +} + /** * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to * setup FDT for kexec/kdump kernel. @@ -937,6 +977,8 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, */ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) { + unsigned int cpu_nodes, extra_size; + struct device_node *dn; u64 usm_entries; if (image->type != KEXEC_TYPE_CRASH) @@ -949,7 +991,22 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) */ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); - return (unsigned int)(usm_entries * sizeof(u64)); + + extra_size = (unsigned int)(usm_entries * sizeof(u64)); + + /* + * Get the number of CPU nodes in the current DT. This allows to + * reserve places for CPU nodes added since the boot time. + */ + cpu_nodes = 0; + for_each_node_by_type(dn, "cpu") { + cpu_nodes++; + } + + if (cpu_nodes > boot_cpu_node_count) + extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); + + return extra_size; } /** -- GitLab From 9b574cfab7d4e68c67c4ee4fcde912ef54a25b88 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Nov 2022 18:32:04 +0100 Subject: [PATCH 536/694] powerpc/pseries: reset the RCU watchdogs after a LPM The RCU watchdog timer should be reset when restarting the CPU after a Live Partition Mobility operation. Signed-off-by: Laurent Dufour Acked-by: Nicholas Piggin [mpe: Combine comments into a single comment block] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221125173204.15329-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 634fac5db3f98..4cea71aa0f41d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -635,10 +635,13 @@ static int do_join(void *arg) prod_others(); } /* - * Execution may have been suspended for several seconds, so - * reset the watchdog. + * Execution may have been suspended for several seconds, so reset + * the watchdogs. touch_nmi_watchdog() also touches the soft lockup + * watchdog. */ + rcu_cpu_stall_reset(); touch_nmi_watchdog(); + return ret; } -- GitLab From f6aa37c51ec0d053ee34c235bfe0e666618a3baf Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 14 Nov 2022 17:01:50 +0100 Subject: [PATCH 537/694] powerpc/pseries: unregister VPA when hot unplugging a CPU The VPA should unregister when offlining a CPU. Otherwise there could be a short window where 2 CPUs could share the same VPA. This happens because the hypervisor is still keeping the VPA attached to the vCPU even if it became offline. Here is a potential situation: 1. remove proc A, 2. add proc B. If proc B gets proc A's place in cpu_present_mask, then it registers proc A's VPAs. 3. If proc B is then re-added to the LP, its threads are sharing VPAs with proc A briefly as they come online. As the hypervisor may check for the VPA's yield_count field oddity, it may detect an unexpected value and kill the LPAR. Suggested-by: Nathan Lynch Signed-off-by: Laurent Dufour Reviewed-by: Nathan Lynch [mpe: s/cpu_present_map/cpu_present_mask/ in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221114160150.13554-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index e0a7ac5db15d9..090ae5a1e0f5e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -70,6 +70,7 @@ static void pseries_cpu_offline_self(void) xics_teardown_cpu(); unregister_slb_shadow(hwcpu); + unregister_vpa(hwcpu); rtas_stop_self(); /* Should never get here... */ -- GitLab From 336e2554ec99eb97616004c791ee89abe96bdab2 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:39 -0600 Subject: [PATCH 538/694] powerpc/rtas: document rtas_call() rtas_call() has a complex calling convention, non-standard return values, and many users. Add kernel-doc for it and remove the less structured commentary from rtas.h. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-2-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 15 --------- arch/powerpc/kernel/rtas.c | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 56319aea646e6..479a95cb27701 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -33,21 +33,6 @@ #define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ -/* - * In general to call RTAS use rtas_token("string") to lookup - * an RTAS token for the given string (e.g. "event-scan"). - * To actually perform the call use - * ret = rtas_call(token, n_in, n_out, ...) - * Where n_in is the number of input parameters and - * n_out is the number of output parameters - * - * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE - * will be returned as a token. rtas_call() does look for this - * token and error out gracefully so rtas_call(rtas_token("str"), ...) - * may be safely used for one-shot calls to RTAS. - * - */ - /* RTAS event classes */ #define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index e847f9b1c5b9f..c12dd5ed5e001 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -467,6 +467,64 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, static int ibm_open_errinjct_token; static int ibm_errinjct_token; +/** + * rtas_call() - Invoke an RTAS firmware function. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @outputs: Array of @nret output words. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_token(). + * + * The @nargs and @nret arguments must match the number of input and + * output parameters specified for the RTAS function. + * + * rtas_call() returns RTAS status codes, not conventional Linux errno + * values. Callers must translate any failure to an appropriate errno + * in syscall context. Most callers of RTAS functions that can return + * -2 or 990x should use rtas_busy_delay() to correctly handle those + * statuses before calling again. + * + * The return value descriptions are adapted from 7.2.8 [RTAS] Return + * Codes of the PAPR and CHRP specifications. + * + * Context: Process context preferably, interrupt context if + * necessary. Acquires an internal spinlock and may perform + * GFP_ATOMIC slab allocation in error path. Unsafe for NMI + * context. + * Return: + * * 0 - RTAS function call succeeded. + * * -1 - RTAS function encountered a hardware or + * platform error, or the token is invalid, + * or the function is restricted by kernel policy. + * * -2 - Specs say "A necessary hardware device was busy, + * and the requested function could not be + * performed. The operation should be retried at + * a later time." This is misleading, at least with + * respect to current RTAS implementations. What it + * usually means in practice is that the function + * could not be completed while meeting RTAS's + * deadline for returning control to the OS (250us + * for PAPR/PowerVM, typically), but the call may be + * immediately reattempted to resume work on it. + * * -3 - Parameter error. + * * -7 - Unexpected state change. + * * 9000...9899 - Vendor-specific success codes. + * * 9900...9905 - Advisory extended delay. Caller should try + * again after ~10^x ms has elapsed, where x is + * the last digit of the status [0-5]. Again going + * beyond the PAPR text, 990x on PowerVM indicates + * contention for RTAS-internal resources. Other + * RTAS call sequences in progress should be + * allowed to complete before reattempting the + * call. + * * -9000 - Multi-level isolation error. + * * -9999...-9004 - Vendor-specific error codes. + * * Additional negative values - Function-specific error. + * * Additional positive values - Function-specific success. + */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; -- GitLab From b10af504a2015d12c566b6b0a4c7e3b602949eeb Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:40 -0600 Subject: [PATCH 539/694] powerpc/rtasd: use correct OF API for event scan rate rtas_token() should be used only for properties that are RTAS function tokens. "rtas-event-scan-rate" does not contain a function token, but it has the same size/format as token properties so reading it with rtas_token() happens to work. Convert to of_property_read_u32(). Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-3-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtasd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 5270b450bbde4..cc56ac6ba4b04 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -499,6 +500,8 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); static int __init rtas_event_scan_init(void) { + int err; + if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -509,8 +512,8 @@ static int __init rtas_event_scan_init(void) return -ENODEV; } - rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); - if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate); + if (err) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); return -ENODEV; } -- GitLab From ed2213bfb192ab51f09f12e9b49b5d482c6493f3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:41 -0600 Subject: [PATCH 540/694] powerpc/rtas: avoid device tree lookups in rtas_os_term() rtas_os_term() is called during panic. Its behavior depends on a couple of conditions in the /rtas node of the device tree, the traversal of which entails locking and local IRQ state changes. If the kernel panics while devtree_lock is held, rtas_os_term() as currently written could hang. Instead of discovering the relevant characteristics at panic time, cache them in file-static variables at boot. Note the lookup for "ibm,extended-os-term" is converted to of_property_read_bool() since it is a boolean property, not an RTAS function token. Signed-off-by: Nathan Lynch Reviewed-by: Nicholas Piggin Reviewed-by: Andrew Donnellan [mpe: Incorporate suggested change from Nick] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-4-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c12dd5ed5e001..db43cbdcc74ca 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -947,6 +947,7 @@ void __noreturn rtas_halt(void) /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; +static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; void rtas_os_term(char *str) { @@ -958,14 +959,13 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || - RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) + if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); do { - status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, + status = rtas_call(ibm_os_term_token, 1, 1, NULL, __pa(rtas_os_term_buf)); } while (rtas_busy_delay(status)); @@ -1335,6 +1335,13 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + /* + * Discover these now to avoid device tree lookups in the + * panic path. + */ + if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) + ibm_os_term_token = rtas_token("ibm,os-term"); + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ -- GitLab From 6c606e57eecc37d6b36d732b1ff7e55b7dc32dd4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:42 -0600 Subject: [PATCH 541/694] powerpc/rtas: avoid scheduling in rtas_os_term() It's unsafe to use rtas_busy_delay() to handle a busy status from the ibm,os-term RTAS function in rtas_os_term(): Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b BUG: sleeping function called from invalid context at arch/powerpc/kernel/rtas.c:618 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 preempt_count: 2, expected: 0 CPU: 7 PID: 1 Comm: swapper/0 Tainted: G D 6.0.0-rc5-02182-gf8553a572277-dirty #9 Call Trace: [c000000007b8f000] [c000000001337110] dump_stack_lvl+0xb4/0x110 (unreliable) [c000000007b8f040] [c0000000002440e4] __might_resched+0x394/0x3c0 [c000000007b8f0e0] [c00000000004f680] rtas_busy_delay+0x120/0x1b0 [c000000007b8f100] [c000000000052d04] rtas_os_term+0xb8/0xf4 [c000000007b8f180] [c0000000001150fc] pseries_panic+0x50/0x68 [c000000007b8f1f0] [c000000000036354] ppc_panic_platform_handler+0x34/0x50 [c000000007b8f210] [c0000000002303c4] notifier_call_chain+0xd4/0x1c0 [c000000007b8f2b0] [c0000000002306cc] atomic_notifier_call_chain+0xac/0x1c0 [c000000007b8f2f0] [c0000000001d62b8] panic+0x228/0x4d0 [c000000007b8f390] [c0000000001e573c] do_exit+0x140c/0x1420 [c000000007b8f480] [c0000000001e586c] make_task_dead+0xdc/0x200 Use rtas_busy_delay_time() instead, which signals without side effects whether to attempt the ibm,os-term RTAS call again. Signed-off-by: Nathan Lynch Reviewed-by: Nicholas Piggin Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-5-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index db43cbdcc74ca..f21b39fcaf996 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -964,10 +964,15 @@ void rtas_os_term(char *str) snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + /* + * Keep calling as long as RTAS returns a "try again" status, + * but don't use rtas_busy_delay(), which potentially + * schedules. + */ do { status = rtas_call(ibm_os_term_token, 1, 1, NULL, __pa(rtas_os_term_buf)); - } while (rtas_busy_delay(status)); + } while (rtas_busy_delay_time(status)); if (status != 0) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); -- GitLab From 9aafbfa5f57a4b75bafd3bed0191e8429c5fa618 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:43 -0600 Subject: [PATCH 542/694] powerpc/pseries/eeh: use correct API for error log size rtas-error-log-max is not the name of an RTAS function, so rtas_token() is not the appropriate API for retrieving its value. We already have rtas_get_error_log_max() which returns a sensible value if the property is absent for any reason, so use that instead. Fixes: 8d633291b4fc ("powerpc/eeh: pseries platform EEH error log retrieval") Signed-off-by: Nathan Lynch [mpe: Drop no-longer possible error handling as noticed by ajd] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-6-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/eeh_pseries.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ea890037843c4..6b507b62ce8f1 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -848,16 +848,7 @@ static int __init eeh_pseries_init(void) } /* Initialize error log size */ - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_info("%s: unknown EEH error log size\n", - __func__); - eeh_error_buf_size = 1024; - } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_info("%s: EEH error log size %d exceeds the maximal %d\n", - __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } + eeh_error_buf_size = rtas_get_error_log_max(); /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); -- GitLab From c67a0e411d0ffe0648fe84e25e9f899ce770feb3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:44 -0600 Subject: [PATCH 543/694] powerpc/rtas: clean up rtas_error_log_max initialization The code in rtas_get_error_log_max() doesn't cause problems in practice, but there are no measures to ensure that the lazy initialization of the static rtas_error_log_max variable is atomic, and it's not worth adding them. Initialize the static rtas_error_log_max variable at boot when we're single-threaded instead of lazily on first use. Use the more appropriate of_property_read_u32() API instead of rtas_token() to consult the "rtas-error-log-max" property, which is not the name of an RTAS function. Convert use of printk() to pr_warn() and distinguish the possible error cases. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-7-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f21b39fcaf996..a1b637259e3db 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -353,6 +353,9 @@ int rtas_service_present(const char *service) EXPORT_SYMBOL(rtas_service_present); #ifdef CONFIG_RTAS_ERROR_LOGGING + +static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; + /* * Return the firmware-specified size of the error log buffer * for all rtas calls that require an error buffer argument. @@ -360,21 +363,30 @@ EXPORT_SYMBOL(rtas_service_present); */ int rtas_get_error_log_max(void) { - static int rtas_error_log_max; - if (rtas_error_log_max) - return rtas_error_log_max; - - rtas_error_log_max = rtas_token ("rtas-error-log-max"); - if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || - (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { - printk (KERN_WARNING "RTAS: bad log buffer size %d\n", - rtas_error_log_max); - rtas_error_log_max = RTAS_ERROR_LOG_MAX; - } return rtas_error_log_max; } EXPORT_SYMBOL(rtas_get_error_log_max); +static void __init init_error_log_max(void) +{ + static const char propname[] __initconst = "rtas-error-log-max"; + u32 max; + + if (of_property_read_u32(rtas.dev, propname, &max)) { + pr_warn("%s not found, using default of %u\n", + propname, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + if (max > RTAS_ERROR_LOG_MAX) { + pr_warn("%s = %u, clamping max error log size to %u\n", + propname, max, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + rtas_error_log_max = max; +} + static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; static int rtas_last_error_token; @@ -432,6 +444,7 @@ static char *__fetch_rtas_last_error(char *altbuf) #else /* CONFIG_RTAS_ERROR_LOGGING */ #define __fetch_rtas_last_error(x) NULL #define get_errorlog_buffer() NULL +static void __init init_error_log_max(void) {} #endif @@ -1340,6 +1353,8 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + init_error_log_max(); + /* * Discover these now to avoid device tree lookups in the * panic path. -- GitLab From 9581f8a00777a073fdd8146659a51ca007cae8d6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:45 -0600 Subject: [PATCH 544/694] powerpc/rtas: clean up includes rtas.c used to host complex code related to pseries-specific guest migration and suspend, which used atomics, completions, hcalls, and CPU hotplug APIs. That's all been deleted or moved, so remove the include directives that have been rendered unnecessary. Sort the remainder (with linux/ before asm/) to impose some order on where future additions go. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-8-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 42 +++++++++++++++----------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a1b637259e3db..5ce17b4bd7ebb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,43 +7,33 @@ * Copyright (C) 2001 IBM. */ -#include -#include -#include -#include -#include -#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include +#include +#include #include +#include #include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include #include -#include -#include #include -#include +#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include +#include /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -- GitLab From f975b6559bac510f1b1b39637997bb240f0a9969 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:46 -0600 Subject: [PATCH 545/694] powerpc/rtas: define pr_fmt and convert printk call sites Set pr_fmt to "rtas: " and convert the handful of printk() uses in rtas.c, adjusting the messages to remove now-redundant "RTAS" strings. Note that rtas_restart(), rtas_power_off(), and rtas_halt() all currently use printk() without specifying a log level. These have been changed to use pr_emerg(), which matches the behavior of rtas_os_term(). Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-9-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 5ce17b4bd7ebb..10c19228aaa37 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,6 +7,8 @@ * Copyright (C) 2001 IBM. */ +#define pr_fmt(fmt) "rtas: " fmt + #include #include #include @@ -718,8 +720,7 @@ static int rtas_error_rc(int rtas_rc) rc = -ENODEV; break; default: - printk(KERN_ERR "%s: unexpected RTAS error %d\n", - __func__, rtas_rc); + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); rc = -ERANGE; break; } @@ -923,8 +924,8 @@ void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); - printk("RTAS system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + pr_emerg("system-reboot returned %d\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); for (;;); } @@ -933,8 +934,8 @@ void rtas_power_off(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } @@ -943,8 +944,8 @@ void __noreturn rtas_halt(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } @@ -978,7 +979,7 @@ void rtas_os_term(char *str) } while (rtas_busy_delay_time(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", status); + pr_emerg("ibm,os-term call failed %d\n", status); } /** -- GitLab From 98c738c8cee6e5a58d4060862e2f8cf3cdc8a328 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 18 Nov 2022 09:07:47 -0600 Subject: [PATCH 546/694] powerpc/rtas: mandate RTAS syscall filtering CONFIG_PPC_RTAS_FILTER has been optional but default-enabled since its introduction. It's been enabled in enterprise distro kernels for a while without causing ABI breakage that wasn't easily fixed, and it prevents harmful abuses of the rtas syscall. Let's make it unconditional. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221118150751.469393-10-nathanl@linux.ibm.com --- arch/powerpc/Kconfig | 13 ------------- arch/powerpc/kernel/rtas.c | 16 ---------------- 2 files changed, 29 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e21d6de797d6e..65952f62ea4be 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1044,19 +1044,6 @@ config PPC_SECVAR_SYSFS read/write operations on these variables. Say Y if you have secure boot enabled and want to expose variables to userspace. -config PPC_RTAS_FILTER - bool "Enable filtering of RTAS syscalls" - default y - depends on PPC_RTAS - help - The RTAS syscall API has security issues that could be used to - compromise system integrity. This option enforces restrictions on the - RTAS calls and arguments passed by userspace programs to mitigate - these issues. - - Say Y unless you know what you are doing and the filter is causing - problems for you. - endmenu config ISA_DMA_API diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 10c19228aaa37..deded51a79784 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1050,8 +1050,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log return NULL; } -#ifdef CONFIG_PPC_RTAS_FILTER - /* * The sys_rtas syscall, as originally designed, allows root to pass * arbitrary physical addresses to RTAS calls. A number of RTAS calls @@ -1200,20 +1198,6 @@ static void __init rtas_syscall_filter_init(void) rtas_filters[i].token = rtas_token(rtas_filters[i].name); } -#else - -static bool block_rtas_call(int token, int nargs, - struct rtas_args *args) -{ - return false; -} - -static void __init rtas_syscall_filter_init(void) -{ -} - -#endif /* CONFIG_PPC_RTAS_FILTER */ - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { -- GitLab From 64fdcbcc064966bbf261bb455876dffa58858d32 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 Dec 2022 09:43:15 +1100 Subject: [PATCH 547/694] powerpc/prom: Fix 32-bit build Add an IS_ENABLED() check to fix the build error: arch/powerpc/kernel/prom.o: in function `early_init_dt_scan_cpus': prom.c:(.init.text+0x2ea): undefined reference to `boot_cpu_node_count' Fixes: e13d23a404f2 ("powerpc: export the CPU node count") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 645f4450dfc38..4f1c920aa13ed 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -336,7 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - boot_cpu_node_count++; + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_node_count++; /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); -- GitLab From bd328def2f987ebd4e20725a490f005556d737bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:09:59 +0100 Subject: [PATCH 548/694] firmware_loader: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit utsrelease.h is potentially generated on each build. By removing this unused include we can get rid of some spurious recompilations. Signed-off-by: Thomas Weißschuh Reviewed-by: Russ Weight Signed-off-by: Masahiro Yamada --- drivers/base/firmware_loader/firmware.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index fe77e91c38a21..bf549d6500d7b 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -9,8 +9,6 @@ #include #include -#include - /** * enum fw_opt - options to control firmware loading behaviour * -- GitLab From 9edb4fd3d70a9dffd8ac6af6d060e97672b4a22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 26 Nov 2022 06:10:01 +0100 Subject: [PATCH 549/694] init/version.c: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2df8220cc511 ("kbuild: build init/built-in.a just once") moved the usage of the define UTS_RELEASE to the file version-timestamp.c. version-timestamp.c in turn is included from version.c but already includes utsrelease.h itself properly. The unneeded include of utsrelease.h from version.c can be dropped. Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once") Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- init/version.c | 1 - 1 file changed, 1 deletion(-) diff --git a/init/version.c b/init/version.c index 01d4ab05f0bac..f117921811b45 100644 --- a/init/version.c +++ b/init/version.c @@ -15,7 +15,6 @@ #include #include #include -#include #include static int __init early_hostname(char *arg) -- GitLab From 4bf73588165ba7d32131a043775557a54b6e1db5 Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Mon, 5 Dec 2022 16:48:19 -0500 Subject: [PATCH 550/694] kbuild: Port silent mode detection to future gnu make. Port silent mode detection to the future (post make-4.4) versions of gnu make. Makefile contains the following piece of make code to detect if option -s is specified on the command line. ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),) This code is executed by make at parse time and assumes that MAKEFLAGS does not contain command line variable definitions. Currently if the user defines a=s on the command line, then at build only time MAKEFLAGS contains " -- a=s". However, starting with commit dc2d963989b96161472b2cd38cef5d1f4851ea34 MAKEFLAGS contains command line definitions at both parse time and build time. This '-s' detection code then confuses a command line variable definition which contains letter 's' with option -s. $ # old make $ make net/wireless/ocb.o a=s CALL scripts/checksyscalls.sh DESCEND objtool $ # this a new make which defines makeflags at parse time $ ~/src/gmake/make/l64/make net/wireless/ocb.o a=s $ We can see here that the letter 's' from 'a=s' was confused with -s. This patch checks for presence of -s using a method recommended by the make manual here https://www.gnu.org/software/make/manual/make.html#Testing-Flags. Link: https://lists.gnu.org/archive/html/bug-make/2022-11/msg00190.html Reported-by: Jan Palus Signed-off-by: Dmitry Goncharov Signed-off-by: Masahiro Yamada --- Makefile | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6f846b1f2618f..fbd9ff4a61e70 100644 --- a/Makefile +++ b/Makefile @@ -93,10 +93,17 @@ endif # If the user is running make -s (silent mode), suppress echoing of # commands +# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. -ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),) - quiet=silent_ - KBUILD_VERBOSE = 0 +ifeq ($(filter 3.%,$(MAKE_VERSION)),) +silence:=$(findstring s,$(firstword -$(MAKEFLAGS))) +else +silence:=$(findstring s,$(filter-out --%,$(MAKEFLAGS))) +endif + +ifeq ($(silence),s) +quiet=silent_ +KBUILD_VERBOSE = 0 endif export quiet Q KBUILD_VERBOSE -- GitLab From 5fb733d7bd6949e90028efdce8bd528c6ab7cf1e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Wed, 23 Nov 2022 09:48:05 +0800 Subject: [PATCH 551/694] rtc: st-lpc: Add missing clk_disable_unprepare in st_rtc_probe() The clk_disable_unprepare() should be called in the error handling of clk_get_rate(), fix it. Fixes: b5b2bdfc2893 ("rtc: st: Add new driver for ST's LPC RTC") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221123014805.1993052-1-cuigaosheng1@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-st-lpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index bdb20f63254e2..0f8e4231098ef 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -238,6 +238,7 @@ static int st_rtc_probe(struct platform_device *pdev) rtc->clkrate = clk_get_rate(rtc->clk); if (!rtc->clkrate) { + clk_disable_unprepare(rtc->clk); dev_err(&pdev->dev, "Unable to fetch clock rate\n"); return -EINVAL; } -- GitLab From 90cd5c88830140c9fade92a8027e0fb2c6e4cc49 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Wed, 23 Nov 2022 09:59:53 +0800 Subject: [PATCH 552/694] rtc: pic32: Move devm_rtc_allocate_device earlier in pic32_rtc_probe() The pic32_rtc_enable(pdata, 0) and clk_disable_unprepare(pdata->clk) should be called in the error handling of devm_rtc_allocate_device(), so we should move devm_rtc_allocate_device earlier in pic32_rtc_probe() to fix it. Fixes: 6515e23b9fde ("rtc: pic32: convert to devm_rtc_allocate_device") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221123015953.1998521-1-cuigaosheng1@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pic32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index 7fb9145c43bd5..fa351ac201587 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -324,16 +324,16 @@ static int pic32_rtc_probe(struct platform_device *pdev) spin_lock_init(&pdata->alarm_lock); + pdata->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(pdata->rtc)) + return PTR_ERR(pdata->rtc); + clk_prepare_enable(pdata->clk); pic32_rtc_enable(pdata, 1); device_init_wakeup(&pdev->dev, 1); - pdata->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(pdata->rtc)) - return PTR_ERR(pdata->rtc); - pdata->rtc->ops = &pic32_rtcops; pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pdata->rtc->range_max = RTC_TIMESTAMP_END_2099; -- GitLab From 800b55b4dc62c4348fbc1f7570a8ac8be3f0eb66 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 18 Nov 2022 15:33:32 +0100 Subject: [PATCH 553/694] dt-bindings: rtc: convert rtc-meson.txt to dt-schema Convert the Amlogic Meson6 RTC bindings to dt-schema. Signed-off-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221117-b4-amlogic-bindings-convert-v1-6-3f025599b968@linaro.org Signed-off-by: Alexandre Belloni --- .../bindings/rtc/amlogic,meson6-rtc.yaml | 62 +++++++++++++++++++ .../devicetree/bindings/rtc/rtc-meson.txt | 35 ----------- 2 files changed, 62 insertions(+), 35 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-meson.txt diff --git a/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml new file mode 100644 index 0000000000000..8bf7d3a9be98c --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/amlogic,meson6-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC + +maintainers: + - Neil Armstrong + - Martin Blumenstingl + +allOf: + - $ref: rtc.yaml# + - $ref: /schemas/nvmem/nvmem.yaml# + +properties: + compatible: + enum: + - amlogic,meson6-rtc + - amlogic,meson8-rtc + - amlogic,meson8b-rtc + - amlogic,meson8m2-rtc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + + resets: + maxItems: 1 + + vdd-supply: true + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + #include + rtc: rtc@740 { + compatible = "amlogic,meson6-rtc"; + reg = <0x740 0x14>; + interrupts = ; + clocks = <&rtc32k_xtal>; + vdd-supply = <&rtc_vdd>; + resets = <&reset_rtc>; + #address-cells = <1>; + #size-cells = <1>; + + mac@0 { + reg = <0 6>; + }; + }; diff --git a/Documentation/devicetree/bindings/rtc/rtc-meson.txt b/Documentation/devicetree/bindings/rtc/rtc-meson.txt deleted file mode 100644 index e921fe66a3622..0000000000000 --- a/Documentation/devicetree/bindings/rtc/rtc-meson.txt +++ /dev/null @@ -1,35 +0,0 @@ -* Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC - -Required properties: -- compatible: should be one of the following describing the hardware: - * "amlogic,meson6-rtc" - * "amlogic,meson8-rtc" - * "amlogic,meson8b-rtc" - * "amlogic,meson8m2-rtc" - -- reg: physical register space for the controller's memory mapped registers. -- interrupts: the interrupt line of the RTC block. -- clocks: reference to the external 32.768kHz crystal oscillator. -- vdd-supply: reference to the power supply of the RTC block. -- resets: reset controller reference to allow reset of the controller - -Optional properties for the battery-backed non-volatile memory: -- #address-cells: should be 1 to address the battery-backed non-volatile memory -- #size-cells: should be 1 to reference the battery-backed non-volatile memory - -Optional child nodes: -- see ../nvmem/nvmem.txt - -Example: - - rtc: rtc@740 { - compatible = "amlogic,meson6-rtc"; - reg = <0x740 0x14>; - interrupts = ; - clocks = <&rtc32k_xtal>; - vdd-supply = <&rtc_vdd>; - resets = <&reset RESET_RTC>; - - #address-cells = <1>; - #size-cells = <1>; - }; -- GitLab From efa80b028c7a9c74fd875517aa0fc9fd8d610ed0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Dec 2022 13:07:30 +0900 Subject: [PATCH 554/694] kbuild: move -Werror from KBUILD_CFLAGS to KBUILD_CPPFLAGS CONFIG_WERROR turns warnings into errors, which happens only for *.c files because -Werror is added to KBUILD_CFLAGS. Adding it to KBUILD_CPPFLAGS makes more sense because preprocessors understand the -Werror option. For example, you can put a #warning directive in any preprocessed code. warning: #warning "this is a warning message" [-Wcpp] If -Werror is added, it is promoted to an error. error: #warning "this is a warning message" [-Werror=cpp] This commit moves -Werror to KBUILD_CPPFLAGS so it works in the same way for *.c, *.S, *.lds.S or whatever needs preprocessing. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fbd9ff4a61e70..6b047daa46cc1 100644 --- a/Makefile +++ b/Makefile @@ -866,7 +866,8 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) -KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror +KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror +KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y) KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings -- GitLab From 80b6093b55e31c2c40ff082fb32523d4e852954f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Dec 2022 13:07:31 +0900 Subject: [PATCH 555/694] kbuild: add -Wundef to KBUILD_CPPFLAGS for W=1 builds The use of an undefined macro in an #if directive is warned, but only in *.c files. No warning from other files such as *.S, *.lds.S. Since -Wundef is a preprocessor-related warning, it should be added to KBUILD_CPPFLAGS instead of KBUILD_CFLAGS. My previous attempt [1] uncovered several issues. I could not finish fixing them all. This commit adds -Wundef to KBUILD_CPPFLAGS for W=1 builds in order to block new breakages. (The kbuild test robot tests with W=1) We can fix the warnings one by one. After fixing all of them, we can make it default in the top Makefile, and remove -Wundef from KBUILD_CFLAGS. [1]: https://lore.kernel.org/all/20221012180118.331005-2-masahiroy@kernel.org/ Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers --- scripts/Makefile.extrawarn | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6bbba36c59695..40cd13eca82e8 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -38,6 +38,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +KBUILD_CPPFLAGS += -Wundef KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 else -- GitLab From ac3a2585f018f10039b4a856dcb122da88c1c1c9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 11 Dec 2022 06:19:33 -0500 Subject: [PATCH 556/694] nfsd: rework refcounting in filecache The filecache refcounting is a bit non-standard for something searchable by RCU, in that we maintain a sentinel reference while it's hashed. This in turn requires that we have to do things differently in the "put" depending on whether its hashed, which we believe to have led to races. There are other problems in here too. nfsd_file_close_inode_sync can end up freeing an nfsd_file while there are still outstanding references to it, and there are a number of subtle ToC/ToU races. Rework the code so that the refcount is what drives the lifecycle. When the refcount goes to zero, then unhash and rcu free the object. A task searching for a nfsd_file is allowed to bump its refcount, but only if it's not already 0. Ensure that we don't make any other changes to it until a reference is held. With this change, the LRU carries a reference. Take special care to deal with it when removing an entry from the list, and ensure that we only repurpose the nf_lru list_head when the refcount is 0 to ensure exclusive access to it. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 318 +++++++++++++++++++++++--------------------- fs/nfsd/trace.h | 51 +++---- 2 files changed, 189 insertions(+), 180 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1998b4d5f692a..45b2c9e3f6360 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -324,8 +324,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -377,24 +376,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -402,10 +412,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -421,17 +430,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -442,86 +457,60 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); - - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); -} - -static void -nfsd_file_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; + + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void -nfsd_file_dispose_list_sync(struct list_head *dispose) +nfsd_file_dispose_list(struct list_head *dispose) { - bool flush = false; struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; + nfsd_file_free(nf); } - if (flush) - flush_delayed_fput(); } static void @@ -591,21 +580,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -616,40 +592,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -659,7 +625,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -685,7 +651,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -695,72 +661,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; } /** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put, then flush and fput all cache items associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ -void -nfsd_file_close_inode_sync(struct inode *inode) +static void +nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); + nfsd_file_queue_for_close(inode, &dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** - * nfsd_file_close_inode - attempt a delayed close of a nfsd_file + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. */ -static void -nfsd_file_close_inode(struct inode *inode) +void +nfsd_file_close_inode_sync(struct inode *inode) { + struct nfsd_file *nf; LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); - nfsd_file_dispose_list_delayed(&dispose); + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); } /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -782,7 +787,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -903,6 +908,13 @@ nfsd_file_cache_init(void) goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -916,8 +928,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1084,8 +1099,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1118,11 +1137,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1132,7 +1151,8 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1158,8 +1178,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 46b8f68a24974..c852ae8eaf371 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -876,8 +876,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -912,6 +912,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1103,35 +1104,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1209,7 +1181,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1241,6 +1212,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, -- GitLab From 21b8a1dd56a163825e5749b303858fb902ebf198 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 10:45:30 -0800 Subject: [PATCH 557/694] rtc: msc313: Fix function prototype mismatch in msc313_rtc_probe() With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. msc313_rtc_probe() was passing clk_disable_unprepare() directly, which did not have matching prototypes for devm_add_action_or_reset()'s callback argument. Refactor to use devm_clk_get_enabled() instead. This was found as a result of Clang's new -Wcast-function-type-strict flag, which is more sensitive than the simpler -Wcast-function-type, which only checks for type width mismatches. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com Suggested-by: Christophe JAILLET Cc: Daniel Palmer Cc: Romain Perier Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rtc@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Daniel Palmer Tested-by: Daniel Palmer Link: https://lore.kernel.org/r/20221202184525.gonna.423-kees@kernel.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-msc313.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c index f3fde013c4b8b..8d7737e0e2e02 100644 --- a/drivers/rtc/rtc-msc313.c +++ b/drivers/rtc/rtc-msc313.c @@ -212,22 +212,12 @@ static int msc313_rtc_probe(struct platform_device *pdev) return ret; } - clk = devm_clk_get(dev, NULL); + clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(clk)) { dev_err(dev, "No input reference clock\n"); return PTR_ERR(clk); } - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "Failed to enable the reference clock, %d\n", ret); - return ret; - } - - ret = devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare, clk); - if (ret) - return ret; - rate = clk_get_rate(clk); writew(rate & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_L); writew((rate >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_H); -- GitLab From 2e830ccc21eb67a4c2490279d907e5e9199e5156 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 20 Oct 2022 22:42:41 +0200 Subject: [PATCH 558/694] rtc: rk808: reduce 'struct rk808' usage Reduce usage of 'struct rk808' (driver data of the parent MFD), so that only the chip variant field is still being accessed directly. This allows restructuring the MFD driver to support SPI based PMICs. Acked-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221020204251.108565-4-sebastian.reichel@collabora.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rk808.c | 47 ++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index e920da8c08da1..2d9bcb3ce1e3b 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -14,7 +14,6 @@ #include #include #include -#include /* RTC_CTRL_REG bitfields */ #define BIT_RTC_CTRL_REG_STOP_RTC_M BIT(0) @@ -51,7 +50,7 @@ struct rk_rtc_compat_reg { }; struct rk808_rtc { - struct rk808 *rk808; + struct regmap *regmap; struct rtc_device *rtc; struct rk_rtc_compat_reg *creg; int irq; @@ -97,12 +96,11 @@ static void gregorian_to_rockchip(struct rtc_time *tm) static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 rtc_data[NUM_TIME_REGS]; int ret; /* Force an update of the shadowed registers right now */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_RTC_GET_TIME, BIT_RTC_CTRL_REG_RTC_GET_TIME); if (ret) { @@ -116,7 +114,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) * 32khz. If we clear the GET_TIME bit here, the time of i2c transfer * certainly more than 31.25us: 16 * 2.5us at 400kHz bus frequency. */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_RTC_GET_TIME, 0); if (ret) { @@ -124,7 +122,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) return ret; } - ret = regmap_bulk_read(rk808->regmap, rk808_rtc->creg->seconds_reg, + ret = regmap_bulk_read(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg, rtc_data, NUM_TIME_REGS); if (ret) { dev_err(dev, "Failed to bulk read rtc_data: %d\n", ret); @@ -148,7 +146,6 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 rtc_data[NUM_TIME_REGS]; int ret; @@ -163,7 +160,7 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[6] = bin2bcd(tm->tm_wday); /* Stop RTC while updating the RTC registers */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M, BIT_RTC_CTRL_REG_STOP_RTC_M); if (ret) { @@ -171,14 +168,14 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) return ret; } - ret = regmap_bulk_write(rk808->regmap, rk808_rtc->creg->seconds_reg, + ret = regmap_bulk_write(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg, rtc_data, NUM_TIME_REGS); if (ret) { dev_err(dev, "Failed to bull write rtc_data: %d\n", ret); return ret; } /* Start RTC again */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M, 0); if (ret) { dev_err(dev, "Failed to update RTC control: %d\n", ret); @@ -191,12 +188,11 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm) static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 alrm_data[NUM_ALARM_REGS]; uint32_t int_reg; int ret; - ret = regmap_bulk_read(rk808->regmap, + ret = regmap_bulk_read(rk808_rtc->regmap, rk808_rtc->creg->alarm_seconds_reg, alrm_data, NUM_ALARM_REGS); if (ret) { @@ -212,7 +208,7 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_year = (bcd2bin(alrm_data[5] & YEARS_REG_MSK)) + 100; rockchip_to_gregorian(&alrm->time); - ret = regmap_read(rk808->regmap, rk808_rtc->creg->int_reg, &int_reg); + ret = regmap_read(rk808_rtc->regmap, rk808_rtc->creg->int_reg, &int_reg); if (ret) { dev_err(dev, "Failed to read RTC INT REG: %d\n", ret); return ret; @@ -228,10 +224,9 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc) { - struct rk808 *rk808 = rk808_rtc->rk808; int ret; - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M, 0); return ret; @@ -239,10 +234,9 @@ static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc) static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc) { - struct rk808 *rk808 = rk808_rtc->rk808; int ret; - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M, BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); @@ -252,7 +246,6 @@ static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc) static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev); - struct rk808 *rk808 = rk808_rtc->rk808; u8 alrm_data[NUM_ALARM_REGS]; int ret; @@ -272,7 +265,7 @@ static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) alrm_data[4] = bin2bcd(alrm->time.tm_mon + 1); alrm_data[5] = bin2bcd(alrm->time.tm_year - 100); - ret = regmap_bulk_write(rk808->regmap, + ret = regmap_bulk_write(rk808_rtc->regmap, rk808_rtc->creg->alarm_seconds_reg, alrm_data, NUM_ALARM_REGS); if (ret) { @@ -313,20 +306,18 @@ static int rk808_rtc_alarm_irq_enable(struct device *dev, static irqreturn_t rk808_alarm_irq(int irq, void *data) { struct rk808_rtc *rk808_rtc = data; - struct rk808 *rk808 = rk808_rtc->rk808; - struct i2c_client *client = rk808->i2c; int ret; - ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg, + ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg, RTC_STATUS_MASK); if (ret) { - dev_err(&client->dev, + dev_err(&rk808_rtc->rtc->dev, "%s:Failed to update RTC status: %d\n", __func__, ret); return ret; } rtc_update_irq(rk808_rtc->rtc, 1, RTC_IRQF | RTC_AF); - dev_dbg(&client->dev, + dev_dbg(&rk808_rtc->rtc->dev, "%s:irq=%d\n", __func__, irq); return IRQ_HANDLED; } @@ -404,10 +395,12 @@ static int rk808_rtc_probe(struct platform_device *pdev) break; } platform_set_drvdata(pdev, rk808_rtc); - rk808_rtc->rk808 = rk808; + rk808_rtc->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!rk808_rtc->regmap) + return -ENODEV; /* start rtc running by default, and use shadowed timer. */ - ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg, + ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg, BIT_RTC_CTRL_REG_STOP_RTC_M | BIT_RTC_CTRL_REG_RTC_READSEL_M, BIT_RTC_CTRL_REG_RTC_READSEL_M); @@ -417,7 +410,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg, + ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg, RTC_STATUS_MASK); if (ret) { dev_err(&pdev->dev, -- GitLab From 103c14db61a24cc0cd344dc5d93d264a36687c35 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Dec 2022 22:57:55 +0100 Subject: [PATCH 559/694] rtc: rx6110: fix warning with !OF rx6110_spi_of_match is not used when !OF, leading to a warning: >> drivers/rtc/rtc-rx6110.c:384:34: warning: 'rx6110_spi_of_match' defined but not used [-Wunused-const-variable=] 384 | static const struct of_device_id rx6110_spi_of_match[] = { | ^~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Link: https://lore.kernel.org/r/20221211215756.54002-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx6110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c index cc634558b9280..76a49838014ba 100644 --- a/drivers/rtc/rtc-rx6110.c +++ b/drivers/rtc/rtc-rx6110.c @@ -376,7 +376,7 @@ static const struct spi_device_id rx6110_spi_id[] = { }; MODULE_DEVICE_TABLE(spi, rx6110_spi_id); -static const struct of_device_id rx6110_spi_of_match[] = { +static const __maybe_unused struct of_device_id rx6110_spi_of_match[] = { { .compatible = "epson,rx6110" }, { }, }; -- GitLab From c2d12e85336f6d4172fb2bab5935027c446d7343 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Dec 2022 23:35:53 +0100 Subject: [PATCH 560/694] rtc: pcf85063: fix pcf85063_clkout_control pcf85063_clkout_control reads the wrong register but then update the correct one. Reported-by: Janne Terho Fixes: 8c229ab6048b ("rtc: pcf85063: Add pcf85063 clkout control to common clock framework") Link: https://lore.kernel.org/r/20221211223553.59955-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf85063.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 99f9cc57c7b30..754e03984f986 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -424,7 +424,7 @@ static int pcf85063_clkout_control(struct clk_hw *hw, bool enable) unsigned int buf; int ret; - ret = regmap_read(pcf85063->regmap, PCF85063_REG_OFFSET, &buf); + ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &buf); if (ret < 0) return ret; buf &= PCF85063_REG_CLKO_F_MASK; -- GitLab From 13959373e9c9021cc80730c7bd1242e07b10b328 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 8 Dec 2022 22:32:25 +1000 Subject: [PATCH 561/694] powerpc/qspinlock: Fix 32-bit build Some 32-bit configurations don't pull in the spin_begin/end/relax definitions. Fix is to restore a lost include. Reported-by: kernel test robot Fixes: 84990b169557 ("powerpc/qspinlock: add mcs queueing for contended waiters") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/oe-kbuild-all/202212050224.i7uh9fOh-lkp@intel.com Link: https://lore.kernel.org/r/20221208123225.1566113-1-npiggin@gmail.com --- arch/powerpc/lib/qspinlock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 1cf5d3e752501..e4bd145255d00 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include -- GitLab From 3e39f7971d75cafe1c90dec60526ad45484657c0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 11 Dec 2022 21:51:23 +0100 Subject: [PATCH 562/694] dt-bindings: rtc: m41t80: Convert text schema to YAML one Convert the m41t80 text schema to YAML schema. Add "#clock-cells" requirement, which is required by clock-output-names. Signed-off-by: Marek Vasut Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221211205124.23823-1-marex@denx.de Signed-off-by: Alexandre Belloni --- .../devicetree/bindings/rtc/rtc-m41t80.txt | 39 ---------- .../devicetree/bindings/rtc/st,m41t80.yaml | 73 +++++++++++++++++++ 2 files changed, 73 insertions(+), 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-m41t80.txt create mode 100644 Documentation/devicetree/bindings/rtc/st,m41t80.yaml diff --git a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt b/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt deleted file mode 100644 index cdd196b1e9bdb..0000000000000 --- a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt +++ /dev/null @@ -1,39 +0,0 @@ -ST M41T80 family of RTC and compatible - -Required properties: -- compatible: should be one of: - "st,m41t62", - "st,m41t65", - "st,m41t80", - "st,m41t81", - "st,m41t81s", - "st,m41t82", - "st,m41t83", - "st,m41t84", - "st,m41t85", - "st,m41t87", - "microcrystal,rv4162", -- reg: I2C bus address of the device - -Optional properties: -- interrupts: rtc alarm interrupt. -- clock-output-names: From common clock binding to override the default output - clock name -- wakeup-source: Enables wake up of host system on alarm - -Optional child node: -- clock: Provide this if the square wave pin is used as boot-enabled fixed clock. - -Example: - rtc@68 { - compatible = "st,m41t80"; - reg = <0x68>; - interrupt-parent = <&UIC0>; - interrupts = <0x9 0x8>; - - clock { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - }; - }; diff --git a/Documentation/devicetree/bindings/rtc/st,m41t80.yaml b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml new file mode 100644 index 0000000000000..fc9c6da6483f5 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/st,m41t80.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ST M41T80 family of RTC and compatible + +maintainers: + - Alexandre Belloni + +properties: + compatible: + enum: + - st,m41t62 + - st,m41t65 + - st,m41t80 + - st,m41t81 + - st,m41t81s + - st,m41t82 + - st,m41t83 + - st,m41t84 + - st,m41t85 + - st,m41t87 + - microcrystal,rv4162 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#clock-cells": + const: 1 + + clock-output-names: + maxItems: 1 + description: From common clock binding to override the default output clock name. + + clock: + type: object + $ref: /schemas/clock/fixed-clock.yaml# + properties: + clock-frequency: + const: 32768 + +allOf: + - $ref: rtc.yaml + +unevaluatedProperties: false + +required: + - compatible + - reg + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + rtc@68 { + compatible = "st,m41t80"; + reg = <0x68>; + interrupt-parent = <&UIC0>; + interrupts = <0x9 0x8>; + + clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + }; + }; -- GitLab From 462e768b55a2331324ff72e74706261134369826 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2022 18:56:09 +0300 Subject: [PATCH 563/694] iommu/mediatek: Fix forever loop in error handling There is a typo so this loop does i++ where i-- was intended. It will result in looping until the kernel crashes. Fixes: 26593928564c ("iommu/mediatek: Add error path for loop of mm_dts_parse") Signed-off-by: Dan Carpenter Reviewed-by: Yong Wu Link: https://lore.kernel.org/r/Y5C3mTam2nkbaz6o@kili Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 392b8c167c44c..e9b3b794811dc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1159,8 +1159,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m return 0; err_larbdev_put: - /* id may be not linear mapping, loop whole the array */ - for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) { + for (i = MTK_LARB_NR_MAX - 1; i >= 0; i--) { if (!data->larb_imu[i].dev) continue; put_device(data->larb_imu[i].dev); -- GitLab From 3bc8edc98bd43540dbe648e4ef91f443d6d20a24 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Mon, 12 Dec 2022 13:11:06 +0200 Subject: [PATCH 564/694] nfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure On error situation `clp->cl_cb_conn.cb_xprt` should not be given a reference to the xprt otherwise both client cleanup and the error handling path of the caller call to put it. Better to delay handing over the reference to a later branch. [ 72.530665] refcount_t: underflow; use-after-free. [ 72.531933] WARNING: CPU: 0 PID: 173 at lib/refcount.c:28 refcount_warn_saturate+0xcf/0x120 [ 72.533075] Modules linked in: nfsd(OE) nfsv4(OE) nfsv3(OE) nfs(OE) lockd(OE) compat_nfs_ssc(OE) nfs_acl(OE) rpcsec_gss_krb5(OE) auth_rpcgss(OE) rpcrdma(OE) dns_resolver fscache netfs grace rdma_cm iw_cm ib_cm sunrpc(OE) mlx5_ib mlx5_core mlxfw pci_hyperv_intf ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nft_counter xt_addrtype nft_compat br_netfilter bridge stp llc nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set overlay nf_tables nfnetlink crct10dif_pclmul crc32_pclmul ghash_clmulni_intel xfs serio_raw virtio_net virtio_blk net_failover failover fuse [last unloaded: sunrpc] [ 72.540389] CPU: 0 PID: 173 Comm: kworker/u16:5 Tainted: G OE 5.15.82-dan #1 [ 72.541511] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.16.0-3.module+el8.7.0+1084+97b81f61 04/01/2014 [ 72.542717] Workqueue: nfsd4_callbacks nfsd4_run_cb_work [nfsd] [ 72.543575] RIP: 0010:refcount_warn_saturate+0xcf/0x120 [ 72.544299] Code: 55 00 0f 0b 5d e9 01 50 98 00 80 3d 75 9e 39 08 00 0f 85 74 ff ff ff 48 c7 c7 e8 d1 60 8e c6 05 61 9e 39 08 01 e8 f6 51 55 00 <0f> 0b 5d e9 d9 4f 98 00 80 3d 4b 9e 39 08 00 0f 85 4c ff ff ff 48 [ 72.546666] RSP: 0018:ffffb3f841157cf0 EFLAGS: 00010286 [ 72.547393] RAX: 0000000000000026 RBX: ffff89ac6231d478 RCX: 0000000000000000 [ 72.548324] RDX: ffff89adb7c2c2c0 RSI: ffff89adb7c205c0 RDI: ffff89adb7c205c0 [ 72.549271] RBP: ffffb3f841157cf0 R08: 0000000000000000 R09: c0000000ffefffff [ 72.550209] R10: 0000000000000001 R11: ffffb3f841157ad0 R12: ffff89ac6231d180 [ 72.551142] R13: ffff89ac6231d478 R14: ffff89ac40c06180 R15: ffff89ac6231d4b0 [ 72.552089] FS: 0000000000000000(0000) GS:ffff89adb7c00000(0000) knlGS:0000000000000000 [ 72.553175] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.553934] CR2: 0000563a310506a8 CR3: 0000000109a66000 CR4: 0000000000350ef0 [ 72.554874] Call Trace: [ 72.555278] [ 72.555614] svc_xprt_put+0xaf/0xe0 [sunrpc] [ 72.556276] nfsd4_process_cb_update.isra.11+0xb7/0x410 [nfsd] [ 72.557087] ? update_load_avg+0x82/0x610 [ 72.557652] ? cpuacct_charge+0x60/0x70 [ 72.558212] ? dequeue_entity+0xdb/0x3e0 [ 72.558765] ? queued_spin_unlock+0x9/0x20 [ 72.559358] nfsd4_run_cb_work+0xfc/0x270 [nfsd] [ 72.560031] process_one_work+0x1df/0x390 [ 72.560600] worker_thread+0x37/0x3b0 [ 72.561644] ? process_one_work+0x390/0x390 [ 72.562247] kthread+0x12f/0x150 [ 72.562710] ? set_kthread_struct+0x50/0x50 [ 72.563309] ret_from_fork+0x22/0x30 [ 72.563818] [ 72.564189] ---[ end trace 031117b1c72ec616 ]--- [ 72.566019] list_add corruption. next->prev should be prev (ffff89ac4977e538), but was ffff89ac4763e018. (next=ffff89ac4763e018). [ 72.567647] ------------[ cut here ]------------ Fixes: a4abc6b12eb1 ("nfsd: Fix svc_xprt refcnt leak when setup callback client failed") Cc: Xiyu Yang Cc: J. Bruce Fields Signed-off-by: Dan Aloni Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1b57f2c2f0bb1..905d66acf6ab3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -988,7 +988,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -1008,6 +1007,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c rpc_shutdown_client(client); return -ENOMEM; } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; rcu_read_lock(); -- GitLab From e4412739472b743e18860ad8d979a7ceb3071652 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Oct 2022 03:18:41 +0900 Subject: [PATCH 565/694] Documentation: raise minimum supported version of binutils to 2.25 Binutils 2.23 was released in 2012. Almost 10 years old. We already require GCC 5.1, released in 2015. Bump the binutils version to 2.25, which was released some months before GCC 5.1. With this applied, some subsystems can start to clean up code. Examples: arch/arm/Kconfig.assembler arch/mips/vdso/Kconfig arch/powerpc/Makefile arch/x86/Kconfig.assembler Signed-off-by: Masahiro Yamada Acked-by: Linus Torvalds Reviewed-by: Nick Desaulniers --- Documentation/process/changes.rst | 4 ++-- scripts/min-tool-version.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 9844ca3a71a61..ef540865ad22e 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -35,7 +35,7 @@ Rust (optional) 1.62.0 rustc --version bindgen (optional) 0.56.0 bindgen --version GNU make 3.82 make --version bash 4.2 bash --version -binutils 2.23 ld -v +binutils 2.25 ld -v flex 2.5.35 flex --version bison 2.0 bison --version pahole 1.16 pahole --version @@ -119,7 +119,7 @@ Bash 4.2 or newer is needed. Binutils -------- -Binutils 2.23 or newer is needed to build the kernel. +Binutils 2.25 or newer is needed to build the kernel. pkg-config ---------- diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 201bccfbc6788..a814f1efb39d5 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -14,7 +14,7 @@ fi case "$1" in binutils) - echo 2.23.0 + echo 2.25.0 ;; gcc) echo 5.1.0 -- GitLab From fccb3d3eda8d19b893e1fd18e8c70b78784b2a72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:46:47 +0900 Subject: [PATCH 566/694] kbuild: add test-{ge,gt,le,lt} macros GNU Make 4.4 introduced $(intcmp ...), which is useful to compare two integers without forking a new process. Add test-{ge,gt,le,lt} macros, which work more efficiently with GNU Make >= 4.4. For older Make versions, they fall back to the 'test' shell command. The first two parameters to $(intcmp ...) must not be empty. To avoid the syntax error, I appended '0' to them. Fortunately, '00' is treated as '0'. This is needed because CONFIG options may expand to an empty string when the kernel configuration is not included. Signed-off-by: Masahiro Yamada Acked-by: Palmer Dabbelt # RISC-V Reviewed-by: Nathan Chancellor Reviewed-by: Nicolas Schier --- Makefile | 2 +- arch/riscv/Makefile | 2 +- arch/x86/Makefile | 2 +- scripts/Kbuild.include | 16 ++++++++++++++++ scripts/Makefile.compiler | 4 ++-- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 6b047daa46cc1..ff36288ae671f 100644 --- a/Makefile +++ b/Makefile @@ -994,7 +994,7 @@ KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 # Check for frame size exceeding threshold during prolog/epilog insertion # when using lld < 13.0.0. ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y) KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN) endif endif diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0d13b597cb55f..faf2c2177094b 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -37,7 +37,7 @@ else endif ifeq ($(CONFIG_LD_IS_LLD),y) -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 415a5d138de47..e72c7a49cd59f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -211,7 +211,7 @@ endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG -ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y) KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) endif endif diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index cbe28744637b7..3be7c2d756679 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -11,6 +11,22 @@ space := $(empty) $(empty) space_escape := _-_SPACE_-_ pound := \# +### +# Comparison macros. +# Usage: $(call test-lt, $(CONFIG_LLD_VERSION), 150000) +# +# Use $(intcmp ...) if supported. (Make >= 4.4) +# Otherwise, fall back to the 'test' shell command. +ifeq ($(intcmp 1,0,,,y),y) +test-ge = $(intcmp $(strip $1)0, $(strip $2)0,,y,y) +test-gt = $(intcmp $(strip $1)0, $(strip $2)0,,,y) +else +test-ge = $(shell test $(strip $1)0 -ge $(strip $2)0 && echo y) +test-gt = $(shell test $(strip $1)0 -gt $(strip $2)0 && echo y) +endif +test-le = $(call test-ge, $2, $1) +test-lt = $(call test-gt, $2, $1) + ### # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o dot-target = $(dir $@).$(notdir $@) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 20d353dcabfbc..3d8adfd34af1b 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\ # gcc-min-version # Usage: cflags-$(call gcc-min-version, 70100) += -foo -gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y) +gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1) # clang-min-version # Usage: cflags-$(call clang-min-version, 110000) += -foo -clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y) +clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1) # ld-option # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) -- GitLab From a5db80c65dbf9144de155f8a0f08becc9c307db0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 18:49:18 +0900 Subject: [PATCH 567/694] kbuild: do not sort after reading modules.order modules.order lists modules in the deterministic order (that is why "modules order"), and there is no duplication in the list. $(sort ) is pointless. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/Makefile.modfinal | 2 +- scripts/Makefile.modinst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 25bedd83644b0..4705d32388f36 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order -modules := $(sort $(shell cat $(MODORDER))) +modules := $(shell cat $(MODORDER)) __modfinal: $(modules) @: diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index a4c987c23750f..f4cff42069adf 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -9,7 +9,7 @@ __modinst: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -modules := $(sort $(shell cat $(MODORDER))) +modules := $(shell cat $(MODORDER)) ifeq ($(KBUILD_EXTMOD),) dst := $(MODLIB)/kernel -- GitLab From 6768fa4bcb6c1618248f135d04b9287ba2724ae0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:54:47 +0900 Subject: [PATCH 568/694] kbuild: add read-file macro Since GNU Make 4.2, $(file ...) supports the read operater '<', which is useful to read a file without forking a new process. No warning is shown even if the input file is missing. For older Make versions, it falls back to the cat command. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Alexander Lobakin Tested-by: Alexander Lobakin --- Makefile | 2 +- scripts/Kbuild.include | 14 ++++++++++++++ scripts/Makefile.modfinal | 2 +- scripts/Makefile.modinst | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ff36288ae671f..591485152a95c 100644 --- a/Makefile +++ b/Makefile @@ -376,7 +376,7 @@ else # !mixed-build include $(srctree)/scripts/Kbuild.include # Read KERNELRELEASE from include/config/kernel.release (if it exists) -KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) +KERNELRELEASE = $(call read-file, include/config/kernel.release) KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 3be7c2d756679..21e76ba0de170 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -10,6 +10,10 @@ empty := space := $(empty) $(empty) space_escape := _-_SPACE_-_ pound := \# +define newline + + +endef ### # Comparison macros. @@ -61,6 +65,16 @@ stringify = $(squote)$(quote)$1$(quote)$(squote) kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile) +### +# Read a file, replacing newlines with spaces +# +# Make 4.2 or later can read a file by using its builtin function. +ifneq ($(filter-out 3.% 4.0 4.1, $(MAKE_VERSION)),) +read-file = $(subst $(newline),$(space),$(file < $1)) +else +read-file = $(shell cat $1 2>/dev/null) +endif + ### # Easy method for doing a status message kecho := : diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 4705d32388f36..83f2797e530c5 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order -modules := $(shell cat $(MODORDER)) +modules := $(call read-file, $(MODORDER)) __modfinal: $(modules) @: diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index f4cff42069adf..65aac6be78ec3 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -9,7 +9,7 @@ __modinst: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include -modules := $(shell cat $(MODORDER)) +modules := $(call read-file, $(MODORDER)) ifeq ($(KBUILD_EXTMOD),) dst := $(MODLIB)/kernel -- GitLab From 3122c84409d578a5df8bcb1953547e0b871ac4c2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 11:54:48 +0900 Subject: [PATCH 569/694] kconfig: refactor Makefile to reduce process forks Refactor Makefile and use read-file macro. For Make >= 4.2, it can read out a file by using the built-in function. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/kconfig/.gitignore | 4 +++- scripts/kconfig/Makefile | 45 +++++++++++++++++++----------------- scripts/kconfig/gconf-cfg.sh | 7 ++++-- scripts/kconfig/mconf-cfg.sh | 25 +++++++++++--------- scripts/kconfig/nconf-cfg.sh | 23 ++++++++++-------- scripts/kconfig/qconf-cfg.sh | 10 +++++--- scripts/remove-stale-files | 2 ++ 7 files changed, 68 insertions(+), 48 deletions(-) diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore index 500e7424b3ef9..c8a3f9cd52f02 100644 --- a/scripts/kconfig/.gitignore +++ b/scripts/kconfig/.gitignore @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /conf /[gmnq]conf -/[gmnq]conf-cfg +/[gmnq]conf-cflags +/[gmnq]conf-libs +/qconf-bin /qconf-moc.cc diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index b8ef0fb4bbef7..0b1d15efaeb0c 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -159,11 +159,12 @@ conf-objs := conf.o $(common-objs) hostprogs += nconf nconf-objs := nconf.o nconf.gui.o $(common-objs) -HOSTLDLIBS_nconf = $(shell . $(obj)/nconf-cfg && echo $$libs) -HOSTCFLAGS_nconf.o = $(shell . $(obj)/nconf-cfg && echo $$cflags) -HOSTCFLAGS_nconf.gui.o = $(shell . $(obj)/nconf-cfg && echo $$cflags) +HOSTLDLIBS_nconf = $(call read-file, $(obj)/nconf-libs) +HOSTCFLAGS_nconf.o = $(call read-file, $(obj)/nconf-cflags) +HOSTCFLAGS_nconf.gui.o = $(call read-file, $(obj)/nconf-cflags) -$(obj)/nconf.o $(obj)/nconf.gui.o: $(obj)/nconf-cfg +$(obj)/nconf: | $(obj)/nconf-libs +$(obj)/nconf.o $(obj)/nconf.gui.o: | $(obj)/nconf-cflags # mconf: Used for the menuconfig target based on lxdialog hostprogs += mconf @@ -171,27 +172,28 @@ lxdialog := $(addprefix lxdialog/, \ checklist.o inputbox.o menubox.o textbox.o util.o yesno.o) mconf-objs := mconf.o $(lxdialog) $(common-objs) -HOSTLDLIBS_mconf = $(shell . $(obj)/mconf-cfg && echo $$libs) +HOSTLDLIBS_mconf = $(call read-file, $(obj)/mconf-libs) $(foreach f, mconf.o $(lxdialog), \ - $(eval HOSTCFLAGS_$f = $$(shell . $(obj)/mconf-cfg && echo $$$$cflags))) + $(eval HOSTCFLAGS_$f = $$(call read-file, $(obj)/mconf-cflags))) -$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/mconf-cfg +$(obj)/mconf: | $(obj)/mconf-libs +$(addprefix $(obj)/, mconf.o $(lxdialog)): | $(obj)/mconf-cflags # qconf: Used for the xconfig target based on Qt hostprogs += qconf qconf-cxxobjs := qconf.o qconf-moc.o qconf-objs := images.o $(common-objs) -HOSTLDLIBS_qconf = $(shell . $(obj)/qconf-cfg && echo $$libs) -HOSTCXXFLAGS_qconf.o = $(shell . $(obj)/qconf-cfg && echo $$cflags) -HOSTCXXFLAGS_qconf-moc.o = $(shell . $(obj)/qconf-cfg && echo $$cflags) - -$(obj)/qconf.o: $(obj)/qconf-cfg +HOSTLDLIBS_qconf = $(call read-file, $(obj)/qconf-libs) +HOSTCXXFLAGS_qconf.o = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags) +HOSTCXXFLAGS_qconf-moc.o = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags) +$(obj)/qconf: | $(obj)/qconf-libs +$(obj)/qconf.o $(obj)/qconf-moc.o: | $(obj)/qconf-cflags quiet_cmd_moc = MOC $@ - cmd_moc = $(shell . $(obj)/qconf-cfg && echo $$moc) $< -o $@ + cmd_moc = $(call read-file, $(obj)/qconf-bin)/moc $< -o $@ -$(obj)/qconf-moc.cc: $(src)/qconf.h $(obj)/qconf-cfg FORCE +$(obj)/qconf-moc.cc: $(src)/qconf.h FORCE | $(obj)/qconf-bin $(call if_changed,moc) targets += qconf-moc.cc @@ -200,15 +202,16 @@ targets += qconf-moc.cc hostprogs += gconf gconf-objs := gconf.o images.o $(common-objs) -HOSTLDLIBS_gconf = $(shell . $(obj)/gconf-cfg && echo $$libs) -HOSTCFLAGS_gconf.o = $(shell . $(obj)/gconf-cfg && echo $$cflags) +HOSTLDLIBS_gconf = $(call read-file, $(obj)/gconf-libs) +HOSTCFLAGS_gconf.o = $(call read-file, $(obj)/gconf-cflags) -$(obj)/gconf.o: $(obj)/gconf-cfg +$(obj)/gconf: | $(obj)/gconf-libs +$(obj)/gconf.o: | $(obj)/gconf-cflags # check if necessary packages are available, and configure build flags -filechk_conf_cfg = $(CONFIG_SHELL) $< +cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin) -$(obj)/%conf-cfg: $(src)/%conf-cfg.sh FORCE - $(call filechk,conf_cfg) +$(obj)/%conf-cflags $(obj)/%conf-libs $(obj)/%conf-bin: $(src)/%conf-cfg.sh + $(call cmd,conf_cfg) -clean-files += *conf-cfg +clean-files += *conf-cflags *conf-libs *conf-bin diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh index cbd90c28c05f2..040d8f3388202 100755 --- a/scripts/kconfig/gconf-cfg.sh +++ b/scripts/kconfig/gconf-cfg.sh @@ -1,6 +1,9 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then @@ -26,5 +29,5 @@ if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then exit 1 fi -echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" -echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" +${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} +${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index 025b565e0b7cd..1e61f50a59050 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -1,19 +1,22 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="ncursesw" PKG2="ncurses" if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} exit 0 fi - if ${HOSTPKG_CONFIG} --exists $PKG2; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + if ${HOSTPKG_CONFIG} --exists ${PKG2}; then + ${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs} exit 0 fi fi @@ -22,22 +25,22 @@ fi # (Even if it is installed, some distributions such as openSUSE cannot # find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" - echo libs=\"-lncursesw\" + echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags} + echo -lncursesw > ${libs} exit 0 fi if [ -f /usr/include/ncurses/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\" - echo libs=\"-lncurses\" + echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags} + echo -lncurses > ${libs} exit 0 fi # As a final fallback before giving up, check if $HOSTCC knows of a default # ncurses installation (e.g. from a vendor-specific sysroot). if echo '#include ' | ${HOSTCC} -E - >/dev/null 2>&1; then - echo cflags=\"-D_GNU_SOURCE\" - echo libs=\"-lncurses\" + echo -D_GNU_SOURCE > ${cflags} + echo -lncurses > ${libs} exit 0 fi diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh index 3a10bac2adb3a..f871a2160e363 100755 --- a/scripts/kconfig/nconf-cfg.sh +++ b/scripts/kconfig/nconf-cfg.sh @@ -1,19 +1,22 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 + PKG="ncursesw menuw panelw" PKG2="ncurses menu panel" if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} exit 0 fi if ${HOSTPKG_CONFIG} --exists $PKG2; then - echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + ${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs} exit 0 fi fi @@ -22,20 +25,20 @@ fi # (Even if it is installed, some distributions such as openSUSE cannot # find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" - echo libs=\"-lncursesw -lmenuw -lpanelw\" + echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags} + echo -lncursesw -lmenuw -lpanelw > ${libs} exit 0 fi if [ -f /usr/include/ncurses/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\" - echo libs=\"-lncurses -lmenu -lpanel\" + echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags} + echo -lncurses -lmenu -lpanel > ${libs} exit 0 fi if [ -f /usr/include/ncurses.h ]; then - echo cflags=\"-D_GNU_SOURCE\" - echo libs=\"-lncurses -lmenu -lpanel\" + echo -D_GNU_SOURCE > ${cflags} + echo -lncurses -lmenu -lpanel > ${libs} exit 0 fi diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh index ad652cb539474..117f36e568fc5 100755 --- a/scripts/kconfig/qconf-cfg.sh +++ b/scripts/kconfig/qconf-cfg.sh @@ -1,6 +1,10 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +cflags=$1 +libs=$2 +bin=$3 + PKG="Qt5Core Qt5Gui Qt5Widgets" if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then @@ -11,9 +15,9 @@ if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then fi if ${HOSTPKG_CONFIG} --exists $PKG; then - echo cflags=\"-std=c++11 -fPIC $(${HOSTPKG_CONFIG} --cflags $PKG)\" - echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" - echo moc=\"$(${HOSTPKG_CONFIG} --variable=host_bins Qt5Core)/moc\" + ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags} + ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs} + ${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin} exit 0 fi diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index ccadfa3afb2b8..64b14aa5aebf4 100755 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -47,3 +47,5 @@ rm -f arch/riscv/purgatory/kexec-purgatory.c rm -f scripts/extract-cert rm -f arch/x86/purgatory/kexec-purgatory.c + +rm -f scripts/kconfig/[gmnq]conf-cfg -- GitLab From 875ef1a57f32fcb91010dc9bc8bd1166956a579e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 12:10:59 +0900 Subject: [PATCH 570/694] kbuild: use .NOTINTERMEDIATE for future GNU Make versions In Kbuild, some files are generated by chains of pattern/implicit rules. For example, *.dtb.o files in drivers/of/unittest-data/Makefile are generated by the chain of 3 pattern rules, like this: %.dts -> %.dtb -> %.dtb.S -> %.dtb.o Here, %.dts is the real source, %.dtb.o is the final target. %.dtb and %.dtb.S are called "intermediate files". As GNU Make manual [1] says, intermediate files are treated differently in two ways: (a) The first difference is what happens if the intermediate file does not exist. If an ordinary file 'b' does not exist, and make considers a target that depends on 'b', it invariably creates 'b' and then updates the target from 'b'. But if 'b' is an intermediate file, then make can leave well enough alone: it won't create 'b' unless one of its prerequisites is out of date. This means the target depending on 'b' won't be rebuilt either, unless there is some other reason to update that target: for example the target doesn't exist or a different prerequisite is newer than the target. (b) The second difference is that if make does create 'b' in order to update something else, it deletes 'b' later on after it is no longer needed. Therefore, an intermediate file which did not exist before make also does not exist after make. make reports the deletion to you by printing a 'rm' command showing which file it is deleting. The combination of these is problematic for Kbuild because most of the build rules depend on FORCE and the if_changed* macros really determine if the target should be updated. So, all missing files, whether they are intermediate or not, are always rebuilt. To see the problem, delete ".SECONDARY:" from scripts/Kbuild.include, and repeat this command: $ make allmodconfig drivers/of/unittest-data/ The intermediate files will be deleted, which results in rebuilding intermediate and final objects in the next run of make. In the old days, people suppressed (b) in inconsistent ways. As commit 54a702f70589 ("kbuild: mark $(targets) as .SECONDARY and remove .PRECIOUS markers") noted, you should not use .PRECIOUS because .PRECIOUS has the following behavior (c), which is not likely what you want. (c) If make is killed or interrupted during the execution of their recipes, the target is not deleted. Also, the target is not deleted on error even if .DELETE_ON_ERROR is specified. .SECONDARY is a much better way to disable (b), but a small problem is that .SECONDARY enables (a), which gives a side-effect to $?; prerequisites marked as .SECONDARY do not appear in $?. This is a drawback for Kbuild. I thought it was a bug and opened a bug report. As Paul, the GNU Make maintainer, concluded in [2], this is not a bug. A good news is that, GNU Make 4.4 added the perfect solution, .NOTINTERMEDIATE, which cancels both (a) and (b). For clarificaton, my understanding of .INTERMEDIATE, .SECONDARY, .PRECIOUS and .NOTINTERMEDIATE are as follows: (a) (b) (c) .INTERMEDIATE enable enable disable .SECONDARY enable disable disable .PRECIOUS disable disable enable .NOTINTERMEDIATE disable disable disable However, GNU Make 4.4 has a bug for the global .NOTINTERMEDIATE. [3] It was fixed by commit 6164608900ad ("[SV 63417] Ensure global .NOTINTERMEDIATE disables all intermediates"), and will be available in the next release of GNU Make. The following is the gain for .NOTINTERMEDIATE: [Current Make] $ make allnoconfig vmlinux [ full build ] $ rm include/linux/device.h $ make vmlinux CALL scripts/checksyscalls.sh Make does not notice the removal of . [Future Make] $ make-latest allnoconfig vmlinux [ full build ] $ rm include/linux/device.h $ make-latest vmlinux CC arch/x86/kernel/asm-offsets.s In file included from ./include/linux/writeback.h:13, from ./include/linux/memcontrol.h:22, from ./include/linux/swap.h:9, from ./include/linux/suspend.h:5, from arch/x86/kernel/asm-offsets.c:13: ./include/linux/blk_types.h:11:10: fatal error: linux/device.h: No such file or directory 11 | #include | ^~~~~~~~~~~~~~~~ compilation terminated. make-latest[1]: *** [scripts/Makefile.build:114: arch/x86/kernel/asm-offsets.s] Error 1 make-latest: *** [Makefile:1282: prepare0] Error 2 Make notices the removal of , and rebuilds objects that depended on . There exists a source file that includes , and it raises an error. To see detailed background information, refer to commit 2d3b1b8f0da7 ("kbuild: drop $(wildcard $^) check in if_changed* for faster rebuild"). [1]: https://www.gnu.org/software/make/manual/make.html#Chained-Rules [2]: https://savannah.gnu.org/bugs/?55532 [3]: https://savannah.gnu.org/bugs/?63417 Signed-off-by: Masahiro Yamada --- scripts/Kbuild.include | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 21e76ba0de170..2f7356b2990bb 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -185,9 +185,6 @@ endif make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) # Find any prerequisites that are newer than target or that do not exist. -# (This is not true for now; $? should contain any non-existent prerequisites, -# but it does not work as expected when .SECONDARY is present. This seems a bug -# of GNU Make.) # PHONY targets skipped in both cases. newer-prereqs = $(filter-out $(PHONY),$?) @@ -263,4 +260,14 @@ endif .DELETE_ON_ERROR: # do not delete intermediate files automatically +# +# .NOTINTERMEDIATE is more correct, but only available on newer Make versions. +# Make 4.4 introduced .NOTINTERMEDIATE, and it appears in .FEATURES, but the +# global .NOTINTERMEDIATE does not work. We can use it on Make > 4.4. +# Use .SECONDARY for older Make versions, but "newer-prereq" cannot detect +# deleted files. +ifneq ($(and $(filter notintermediate, $(.FEATURES)),$(filter-out 4.4,$(MAKE_VERSION))),) +.NOTINTERMEDIATE: +else .SECONDARY: +endif -- GitLab From 508f28c67171e276356650f407dd87d42b6913ef Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 571/694] LoongArch: Consolidate __ex_table construction Consolidate all the __ex_table constuction code with a _ASM_EXTABLE or _asm_extable helper. There should be no functional change as a result of this patch. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 13 ++++----- arch/loongarch/include/asm/uaccess.h | 9 ++---- arch/loongarch/kernel/fpu.S | 5 ++-- arch/loongarch/lib/clear_user.S | 5 ++-- arch/loongarch/lib/copy_user.S | 5 ++-- 6 files changed, 49 insertions(+), 23 deletions(-) create mode 100644 arch/loongarch/include/asm/asm-extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h new file mode 100644 index 0000000000000..4f615bf56727e --- /dev/null +++ b/arch/loongarch/include/asm/asm-extable.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + .pushsection __ex_table, "a"; \ + .balign 8; \ + .quad (insn); \ + .quad (fixup); \ + .popsection; + + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup) + .endm + +#else /* __ASSEMBLY__ */ + +#include +#include + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + ".pushsection __ex_table, \"a\"\n" \ + ".balign 8\n" \ + ".quad ((" insn "))\n" \ + ".quad ((" fixup "))\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index feb6658c84ff8..bdcd1c6132998 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,10 +23,8 @@ "4: li.w %0, %6 \n" \ " b 3b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 4b \n" \ - " "__UA_ADDR "\t2b, 4b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ @@ -90,10 +89,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "4: li.d %0, %6 \n" " b 3b \n" " .previous \n" - " .section __ex_table,\"a\" \n" - " "__UA_ADDR "\t1b, 4b \n" - " "__UA_ADDR "\t2b, 4b \n" - " .previous \n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index a8ae2af4025ab..bf9a4e218ac0e 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -165,9 +166,7 @@ do { \ " move %1, $zero \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ : "m" (__m(ptr)), "i" (-EFAULT)); \ \ @@ -196,9 +195,7 @@ do { \ "3: li.w %0, %3 \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " " __UA_ADDR " 1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ : "Jr" (__pu_val), "i" (-EFAULT)); \ } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 576b3370a296d..ccde94140c896 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -21,9 +22,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro sc_save_fp base diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 16ba2b8dd68ad..7a066d6a41b80 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 97d20327a69ee..f8ace04586c26 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* -- GitLab From 3d36f4298ba91fbdec6bc56aa7bb0663cba6ab0c Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:48 +0800 Subject: [PATCH 572/694] LoongArch: Switch to relative exception tables Similar to other architectures such as arm64, x86, riscv and so on, use offsets relative to the exception table entry values rather than their absolute addresses for both the exception location and the fixup. However, LoongArch label difference because it will actually produce two relocations, a pair of R_LARCH_ADD32 and R_LARCH_SUB32. Take simple code below for example: $ cat test_ex_table.S .section .text 1: nop .section __ex_table,"a" .balign 4 .long (1b - .) .previous $ loongarch64-unknown-linux-gnu-gcc -c test_ex_table.S $ loongarch64-unknown-linux-gnu-readelf -Wr test_ex_table.o Relocation section '.rela__ex_table' at offset 0x100 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000600000032 R_LARCH_ADD32 0000000000000000 .L1^B1 + 0 0000000000000000 0000000500000037 R_LARCH_SUB32 0000000000000000 L0^A + 0 The modpost will complain the R_LARCH_SUB32 relocation, so we need to patch modpost.c to skip this relocation for .rela__ex_table section. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 12 ++++----- arch/loongarch/include/asm/extable.h | 26 +++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 2 +- arch/loongarch/mm/extable.c | 32 ++++++++++++++++-------- scripts/mod/modpost.c | 13 ++++++++++ scripts/sorttable.c | 2 +- 6 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 arch/loongarch/include/asm/extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 4f615bf56727e..74f8bc75472ac 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -6,9 +6,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ .pushsection __ex_table, "a"; \ - .balign 8; \ - .quad (insn); \ - .quad (fixup); \ + .balign 4; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ .popsection; .macro _asm_extable, insn, fixup @@ -22,9 +22,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ ".pushsection __ex_table, \"a\"\n" \ - ".balign 8\n" \ - ".quad ((" insn "))\n" \ - ".quad ((" fixup "))\n" \ + ".balign 4\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h new file mode 100644 index 0000000000000..b571c89705d1e --- /dev/null +++ b/arch/loongarch/include/asm/extable.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_EXTABLE_H +#define _ASM_LOONGARCH_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +bool fixup_exception(struct pt_regs *regs); + +#endif diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index bf9a4e218ac0e..e33282e0bdef2 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -15,8 +15,8 @@ #include #include #include +#include #include -#include #include extern u64 __ua_limit; diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index bc20988f2b87c..08a9a7d6357a7 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -3,20 +3,32 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include -#include -#include #include +#include +#include + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} -int fixup_exception(struct pt_regs *regs) +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) { - const struct exception_table_entry *fixup; + regs->csr_era = get_ex_fixup(ex); - fixup = search_exception_tables(exception_era(regs)); - if (fixup) { - regs->csr_era = fixup->fixup; + return true; +} + + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; - return 1; - } + ex = search_exception_tables(exception_era(regs)); + if (!ex) + return false; - return 0; + return ex_handler_fixup(ex, regs); } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2c80da0220c32..9321c0a05ffdc 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1523,6 +1523,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) #define R_RISCV_SUB32 39 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + +#ifndef R_LARCH_SUB32 +#define R_LARCH_SUB32 55 +#endif + static void section_rela(const char *modname, struct elf_info *elf, Elf_Shdr *sechdr) { @@ -1564,6 +1572,11 @@ static void section_rela(const char *modname, struct elf_info *elf, ELF_R_TYPE(r.r_info) == R_RISCV_SUB32) continue; break; + case EM_LOONGARCH: + if (!strcmp("__ex_table", fromsec) && + ELF_R_TYPE(r.r_info) == R_LARCH_SUB32) + continue; + break; } sym = elf->symtab_start + r_sym; /* Skip special sections */ diff --git a/scripts/sorttable.c b/scripts/sorttable.c index fba40e99f3541..0f2beda804789 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -312,12 +312,12 @@ static int do_file(char const *const fname, void *addr) case EM_PARISC: case EM_PPC: case EM_PPC64: + case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: case EM_ARCV2: case EM_ARM: - case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: -- GitLab From 26bc82441250f2e01621f5b26606a4f6926ee3ad Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 573/694] LoongArch: extable: Add `type` and `data` fields This is a LoongArch port of commit d6e2cc564775 ("arm64: extable: add `type` and `data` fields"). Subsequent patches will add specialized handlers for fixups, in addition to the simple PC fixup we have today. In preparation, this patch adds a new `type` field to struct exception_table_entry, and uses this to distinguish the fixup and other cases. A `data` field is also added so that subsequent patches can associate data specific to each exception site (e.g. register numbers). Handlers are named ex_handler_*() for consistency, following the example of x86. At the same time, get_ex_fixup() is split out into a helper so that it can be used by other ex_handler_*() functions in the subsequent patches. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 15 +++++++++++---- arch/loongarch/include/asm/extable.h | 11 +++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 +-- arch/loongarch/mm/extable.c | 7 ++++++- scripts/sorttable.c | 2 +- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 74f8bc75472ac..634bd770e3c40 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -2,17 +2,22 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 + #ifdef __ASSEMBLY__ -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ .balign 4; \ .long ((insn) - .); \ .long ((fixup) - .); \ + .short (type); \ + .short (data); \ .popsection; .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup) + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm #else /* __ASSEMBLY__ */ @@ -20,15 +25,17 @@ #include #include -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".balign 4\n" \ ".long ((" insn ") - .)\n" \ ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup) + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index b571c89705d1e..92612b4364a1d 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -17,10 +17,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index b3309a5e695b2..efecda0c23616 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -4,6 +4,7 @@ #include #define PAGE_SIZE _PAGE_SIZE +#define RO_EXCEPTION_TABLE_ALIGN 4 /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -53,8 +54,6 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; - EXCEPTION_TABLE(16) - .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 08a9a7d6357a7..fd2395221cffb 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -30,5 +30,10 @@ bool fixup_exception(struct pt_regs *regs) if (!ex) return false; - return ex_handler_fixup(ex, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + } + + BUG(); } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 0f2beda804789..83cdb843d92f0 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -304,6 +304,7 @@ static int do_file(char const *const fname, void *addr) switch (r2(&ehdr->e_machine)) { case EM_386: case EM_AARCH64: + case EM_LOONGARCH: case EM_RISCV: case EM_S390: case EM_X86_64: @@ -312,7 +313,6 @@ static int do_file(char const *const fname, void *addr) case EM_PARISC: case EM_PPC: case EM_PPC64: - case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- GitLab From 672999cfae3e830a64c4996362a26934fd555ff9 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 574/694] LoongArch: extable: Add a dedicated uaccess handler Inspired by commit 2e77a62cb3a6("arm64: extable: add a dedicated uaccess handler"), do similar to LoongArch to add a dedicated uaccess exception handler to update registers in exception context and subsequently return back into the function which faulted, so we remove the need for fixups specialized to each faulting instruction. Add gpr-num.h here because we need to map the same GPR names to integer constants, so that we can use this to build meta-data for the exception fixups. The compiler treats gpr 0 as zero rather than $r0, so set it separately to .L__gpr_num_zero, otherwise the following assembly error will occurs: {standard input}: Assembler messages: {standard input}:1074: Error: invalid operands (*UND* and *ABS* sections) for `<<' {standard input}:1160: Error: invalid operands (*UND* and *ABS* sections) for `<<' make[1]: *** [scripts/Makefile.build:249: fs/fcntl.o] Error 1 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 22 ++++++---------------- arch/loongarch/include/asm/gpr-num.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 17 ++++------------- arch/loongarch/mm/extable.c | 22 ++++++++++++++++++++++ 5 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 arch/loongarch/include/asm/gpr-num.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 634bd770e3c40..f5502cb50c6ef 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -4,6 +4,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 #ifdef __ASSEMBLY__ @@ -24,6 +25,7 @@ #include #include +#include #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ @@ -37,6 +39,26 @@ #define _ASM_EXTABLE(insn, fixup) \ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index bdcd1c6132998..042ca4448e4d3 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -19,16 +19,11 @@ "2: sc.w $t0, %2 \n" \ " beqz $t0, 1b \n" \ "3: \n" \ - " .section .fixup,\"ax\" \n" \ - "4: li.w %0, %6 \n" \ - " b 3b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ - : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ - "i" (-EFAULT) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg) \ : "memory", "t0"); \ } @@ -85,15 +80,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv " beqz $t0, 1b \n" "3: \n" __WEAK_LLSC_MB - " .section .fixup,\"ax\" \n" - "4: li.d %0, %6 \n" - " b 3b \n" - " .previous \n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) - : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), - "i" (-EFAULT) + : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval) : "memory", "t0"); *uval = val; diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h new file mode 100644 index 0000000000000..e0941af20c7e7 --- /dev/null +++ b/arch/loongarch/include/asm/gpr-num.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .equ .L__gpr_num_zero, 0 + .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_$r\num, \num + .endr + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .equ .L__gpr_num_zero, 0\n" \ +" .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_$r\\num, \\num\n" \ +" .endr\n" \ + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index e33282e0bdef2..255899d4a7c36 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -161,14 +161,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %1, %2 \n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " move %1, $zero \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ - : "m" (__m(ptr)), "i" (-EFAULT)); \ + : "m" (__m(ptr))); \ \ (val) = (__typeof__(*(ptr))) __gu_tmp; \ } @@ -191,13 +186,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %z2, %1 # __put_user_asm\n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ - : "Jr" (__pu_val), "i" (-EFAULT)); \ + : "Jr" (__pu_val)); \ } #define __get_kernel_nofault(dst, src, type, err_label) \ diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index fd2395221cffb..9b0cfd8989408 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -13,6 +14,13 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } +static inline void regs_set_gpr(struct pt_regs *regs, + unsigned int offset, unsigned long val) +{ + if (offset && offset <= MAX_REG_OFFSET) + *(unsigned long *)((unsigned long)regs + offset) = val; +} + static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -21,6 +29,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); + regs->csr_era = get_ex_fixup(ex); + + return true; +} bool fixup_exception(struct pt_regs *regs) { @@ -33,6 +53,8 @@ bool fixup_exception(struct pt_regs *regs) switch (ex->type) { case EX_TYPE_FIXUP: return ex_handler_fixup(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); } BUG(); -- GitLab From 912bcfaf36771a2bf7a83799ce5454850d1c3f40 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 575/694] LoongArch: Remove the .fixup section usage Use the `.L_xxx` label to improve fixup code and then remove the .fixup section usage. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 14 +++++--------- arch/loongarch/lib/copy_user.S | 16 ++++++---------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 7a066d6a41b80..d5c9e44ac8c4f 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a1, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a1, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __clear_user(void *addr, size_t size) @@ -36,7 +32,7 @@ SYM_FUNC_START(__clear_user) 2: move a0, a1 jr ra - fixup_ex 1, 3, 0, 1 + _asm_extable 1b, .L_fixup_handle_0 SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index f8ace04586c26..61933d964da03 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a2, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a2, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __copy_user(void *to, const void *from, size_t n) @@ -39,8 +35,8 @@ SYM_FUNC_START(__copy_user) 3: move a0, a2 jr ra - fixup_ex 1, 4, 0, 1 - fixup_ex 2, 4, 0, 0 + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_0 SYM_FUNC_END(__copy_user) EXPORT_SYMBOL(__copy_user) -- GitLab From dbcd7f5fafea64dbe588c4ec18bc309fde5d1e1c Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 576/694] LoongArch: BPF: Add BPF exception tables Inspired by commit 800834285361("bpf, arm64: Add BPF exception tables"), do similar to LoongArch to add BPF exception tables. When a tracing BPF program attempts to read memory without using the bpf_probe_read() helper, the verifier marks the load instruction with the BPF_PROBE_MEM flag. Since the LoongArch JIT does not currently recognize this flag it falls back to the interpreter. Add support for BPF_PROBE_MEM, by appending an exception table to the BPF program. If the load instruction causes a data abort, the fixup infrastructure finds the exception table and fixes up the fault, by clearing the destination register and jumping over the faulting instruction. To keep the compact exception table entry format, inspect the pc in fixup_exception(). A more generic solution would add a "handler" field to the table entry, like on x86, s390 and arm64, etc. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 1 + arch/loongarch/include/asm/extable.h | 10 +++ arch/loongarch/mm/extable.c | 2 + arch/loongarch/net/bpf_jit.c | 86 ++++++++++++++++++++++-- arch/loongarch/net/bpf_jit.h | 2 + 5 files changed, 96 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index f5502cb50c6ef..df05005f2b80a 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -5,6 +5,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_UACCESS_ERR_ZERO 2 +#define EX_TYPE_BPF 3 #ifdef __ASSEMBLY__ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index 92612b4364a1d..5abf29f1bc919 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -32,6 +32,16 @@ do { \ (b)->data = (tmp).data; \ } while (0) +#ifdef CONFIG_BPF_JIT +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); +#else +static inline +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + return false; +} +#endif /* !CONFIG_BPF_JIT */ + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 9b0cfd8989408..9ab69872dcffe 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -55,6 +55,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); } BUG(); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index bdcd0c7719a9e..c4b1947ebf768 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -387,6 +387,65 @@ static bool is_signed_bpf_cond(u8 cond) cond == BPF_JSGE || cond == BPF_JSLE; } +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) + +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + + regs->regs[dst_reg] = 0; + regs->csr_era = (unsigned long)&ex->fixup - offset; + + return true; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + unsigned long pc; + off_t offset; + struct exception_table_entry *ex; + + if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->num_exentries]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + + ex->insn = offset; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + */ + offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + return -ERANGE; + + ex->type = EX_TYPE_BPF; + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ctx->num_exentries++; + + return 0; +} + static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) { u8 tm = -1; @@ -816,6 +875,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { @@ -854,6 +917,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* *(size *)(dst + off) = imm */ @@ -1018,6 +1085,9 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) + return -1; + return 0; } @@ -1025,7 +1095,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; u8 *image_ptr; - int image_size; + int image_size, prog_size, extable_size; struct jit_ctx ctx; struct jit_data *jit_data; struct bpf_binary_header *header; @@ -1066,7 +1136,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } @@ -1088,12 +1158,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); + /* Now we know the actual image size. * As each LoongArch instruction is of length 32bit, * we are translating number of JITed intructions into * the size required to store these JITed code. */ - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; + image_size = prog_size + extable_size; /* Now we know the size of the structure to make */ header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); @@ -1104,9 +1177,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass to generate final JIT code */ ctx.image = (union loongarch_instruction *)image_ptr; + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; + ctx.num_exentries = 0; build_prologue(&ctx); if (build_body(&ctx, extra_pass)) { @@ -1125,7 +1201,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* And we're done */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); /* Update the icache */ flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); @@ -1147,7 +1223,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->header = header; } prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; prog->bpf_func = (void *)ctx.image; if (!prog->is_func || extra_pass) { diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index e665ddb0aeb85..ca708024fdd3e 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -4,6 +4,7 @@ * * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -15,6 +16,7 @@ struct jit_ctx { unsigned int flags; unsigned int epilogue_offset; u32 *offset; + int num_exentries; union loongarch_instruction *image; u32 stack_size; }; -- GitLab From 61a6fccc0bd2e8030b2672a52ef3f6706b2b2ee4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 577/694] LoongArch: Add unaligned access support Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support unaligned access in hardware, while Loongson-3 series (Loongson-3A5000, Loongson-3C5000) are configurable whether support unaligned access in hardware. This patch add unaligned access emulation for those LoongArch processors without hardware support. Signed-off-by: Huacai Chen --- Documentation/admin-guide/sysctl/kernel.rst | 8 +- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/inst.h | 14 + arch/loongarch/include/asm/thread_info.h | 2 +- arch/loongarch/kernel/Makefile | 3 +- arch/loongarch/kernel/traps.c | 27 ++ arch/loongarch/kernel/unaligned.c | 499 ++++++++++++++++++++ arch/loongarch/lib/Makefile | 2 +- arch/loongarch/lib/unaligned.S | 84 ++++ 9 files changed, 634 insertions(+), 7 deletions(-) create mode 100644 arch/loongarch/kernel/unaligned.c create mode 100644 arch/loongarch/lib/unaligned.S diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 98d1b198b2b4c..f2b802cd6208b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -433,8 +433,8 @@ ignore-unaligned-usertrap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``; -currently, ``arc`` and ``ia64``), controls whether all unaligned traps -are logged. +currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all +unaligned traps are logged. = ============================================================= 0 Log all unaligned accesses. @@ -1457,8 +1457,8 @@ unaligned-trap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently, -``arc`` and ``parisc``), controls whether unaligned traps are caught -and emulated (instead of failing). +``arc``, ``parisc`` and ``loongarch``), controls whether unaligned traps +are caught and emulated (instead of failing). = ======================================================== 0 Do not emulate unaligned accesses. diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 903096bd87f88..0daf6263655b3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -121,6 +121,8 @@ config LOONGARCH select RTC_LIB select SMP select SPARSE_IRQ + select SYSCTL_ARCH_UNALIGN_ALLOW + select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_EXCEPTION_TRACE select SWIOTLB select TRACE_IRQFLAGS_SUPPORT diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index fce1843ceebb3..889d6c9fc2b6a 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -76,6 +76,10 @@ enum reg2i12_op { ldbu_op = 0xa8, ldhu_op = 0xa9, ldwu_op = 0xaa, + flds_op = 0xac, + fsts_op = 0xad, + fldd_op = 0xae, + fstd_op = 0xaf, }; enum reg2i14_op { @@ -146,6 +150,10 @@ enum reg3_op { ldxbu_op = 0x7040, ldxhu_op = 0x7048, ldxwu_op = 0x7050, + fldxs_op = 0x7060, + fldxd_op = 0x7068, + fstxs_op = 0x7070, + fstxd_op = 0x7078, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) +struct pt_regs; + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc); +unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); +unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index b7dd9f19a5a9c..1a3354ca056e9 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -38,7 +38,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ - .flags = 0, \ + .flags = _TIF_FIXADE, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 42be564278fa1..2ad2555b53eaf 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,7 +7,8 @@ extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ + unaligned.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 1a4dce84ebc60..7ea62faeeadb5 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs) irqentry_exit(regs, state); } +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */ + asmlinkage void noinstr do_ale(struct pt_regs *regs) { + unsigned int *pc; irqentry_state_t state = irqentry_enter(regs); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); + + /* + * Did we catch a fault trying to load an instruction? + */ + if (regs->csr_badvaddr == regs->csr_era) + goto sigbus; + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) + goto sigbus; + if (!unaligned_enabled) + goto sigbus; + if (!no_unaligned_warning) + show_registers(regs); + + pc = (unsigned int *)exception_era(regs); + + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc); + + goto out; + +sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); +out: irqentry_exit(regs, state); } diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c new file mode 100644 index 0000000000000..bdff825d29ef4 --- /dev/null +++ b/arch/loongarch/kernel/unaligned.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle unaligned accesses by emulation. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1996, 1998, 1999, 2002 by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2014 Imagination Technologies Ltd. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "access-helper.h" + +#ifdef CONFIG_DEBUG_FS +static u32 unaligned_instructions_user; +static u32 unaligned_instructions_kernel; +#endif + +static inline unsigned long read_fpr(unsigned int idx) +{ +#define READ_FPR(idx, __value) \ + __asm__ __volatile__("movfr2gr.d %0, $f"#idx"\n\t" : "=r"(__value)); + + unsigned long __value; + + switch (idx) { + case 0: + READ_FPR(0, __value); + break; + case 1: + READ_FPR(1, __value); + break; + case 2: + READ_FPR(2, __value); + break; + case 3: + READ_FPR(3, __value); + break; + case 4: + READ_FPR(4, __value); + break; + case 5: + READ_FPR(5, __value); + break; + case 6: + READ_FPR(6, __value); + break; + case 7: + READ_FPR(7, __value); + break; + case 8: + READ_FPR(8, __value); + break; + case 9: + READ_FPR(9, __value); + break; + case 10: + READ_FPR(10, __value); + break; + case 11: + READ_FPR(11, __value); + break; + case 12: + READ_FPR(12, __value); + break; + case 13: + READ_FPR(13, __value); + break; + case 14: + READ_FPR(14, __value); + break; + case 15: + READ_FPR(15, __value); + break; + case 16: + READ_FPR(16, __value); + break; + case 17: + READ_FPR(17, __value); + break; + case 18: + READ_FPR(18, __value); + break; + case 19: + READ_FPR(19, __value); + break; + case 20: + READ_FPR(20, __value); + break; + case 21: + READ_FPR(21, __value); + break; + case 22: + READ_FPR(22, __value); + break; + case 23: + READ_FPR(23, __value); + break; + case 24: + READ_FPR(24, __value); + break; + case 25: + READ_FPR(25, __value); + break; + case 26: + READ_FPR(26, __value); + break; + case 27: + READ_FPR(27, __value); + break; + case 28: + READ_FPR(28, __value); + break; + case 29: + READ_FPR(29, __value); + break; + case 30: + READ_FPR(30, __value); + break; + case 31: + READ_FPR(31, __value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef READ_FPR + return __value; +} + +static inline void write_fpr(unsigned int idx, unsigned long value) +{ +#define WRITE_FPR(idx, value) \ + __asm__ __volatile__("movgr2fr.d $f"#idx", %0\n\t" :: "r"(value)); + + switch (idx) { + case 0: + WRITE_FPR(0, value); + break; + case 1: + WRITE_FPR(1, value); + break; + case 2: + WRITE_FPR(2, value); + break; + case 3: + WRITE_FPR(3, value); + break; + case 4: + WRITE_FPR(4, value); + break; + case 5: + WRITE_FPR(5, value); + break; + case 6: + WRITE_FPR(6, value); + break; + case 7: + WRITE_FPR(7, value); + break; + case 8: + WRITE_FPR(8, value); + break; + case 9: + WRITE_FPR(9, value); + break; + case 10: + WRITE_FPR(10, value); + break; + case 11: + WRITE_FPR(11, value); + break; + case 12: + WRITE_FPR(12, value); + break; + case 13: + WRITE_FPR(13, value); + break; + case 14: + WRITE_FPR(14, value); + break; + case 15: + WRITE_FPR(15, value); + break; + case 16: + WRITE_FPR(16, value); + break; + case 17: + WRITE_FPR(17, value); + break; + case 18: + WRITE_FPR(18, value); + break; + case 19: + WRITE_FPR(19, value); + break; + case 20: + WRITE_FPR(20, value); + break; + case 21: + WRITE_FPR(21, value); + break; + case 22: + WRITE_FPR(22, value); + break; + case 23: + WRITE_FPR(23, value); + break; + case 24: + WRITE_FPR(24, value); + break; + case 25: + WRITE_FPR(25, value); + break; + case 26: + WRITE_FPR(26, value); + break; + case 27: + WRITE_FPR(27, value); + break; + case 28: + WRITE_FPR(28, value); + break; + case 29: + WRITE_FPR(29, value); + break; + case 30: + WRITE_FPR(30, value); + break; + case 31: + WRITE_FPR(31, value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef WRITE_FPR +} + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc) +{ + bool fp = false; + bool sign, write; + bool user = user_mode(regs); + unsigned int res, size = 0; + unsigned long value = 0; + union loongarch_instruction insn; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + __get_inst(&insn.word, pc, user); + + switch (insn.reg2i12_format.opcode) { + case ldh_op: + size = 2; + sign = true; + write = false; + break; + case ldhu_op: + size = 2; + sign = false; + write = false; + break; + case sth_op: + size = 2; + sign = true; + write = true; + break; + case ldw_op: + size = 4; + sign = true; + write = false; + break; + case ldwu_op: + size = 4; + sign = false; + write = false; + break; + case stw_op: + size = 4; + sign = true; + write = true; + break; + case ldd_op: + size = 8; + sign = true; + write = false; + break; + case std_op: + size = 8; + sign = true; + write = true; + break; + case flds_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fsts_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + switch (insn.reg2i14_format.opcode) { + case ldptrw_op: + size = 4; + sign = true; + write = false; + break; + case stptrw_op: + size = 4; + sign = true; + write = true; + break; + case ldptrd_op: + size = 8; + sign = true; + write = false; + break; + case stptrd_op: + size = 8; + sign = true; + write = true; + break; + } + + switch (insn.reg3_format.opcode) { + case ldxh_op: + size = 2; + sign = true; + write = false; + break; + case ldxhu_op: + size = 2; + sign = false; + write = false; + break; + case stxh_op: + size = 2; + sign = true; + write = true; + break; + case ldxw_op: + size = 4; + sign = true; + write = false; + break; + case ldxwu_op: + size = 4; + sign = false; + write = false; + break; + case stxw_op: + size = 4; + sign = true; + write = true; + break; + case ldxd_op: + size = 8; + sign = true; + write = false; + break; + case stxd_op: + size = 8; + sign = true; + write = true; + break; + case fldxs_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fstxs_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldxd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstxd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + if (!size) + goto sigbus; + if (user && !access_ok(addr, size)) + goto sigbus; + + if (!write) { + res = unaligned_read(addr, &value, size, sign); + if (res) + goto fault; + + /* Rd is the same field in any formats */ + if (!fp) + regs->regs[insn.reg3_format.rd] = value; + else { + if (is_fpu_owner()) + write_fpr(insn.reg3_format.rd, value); + else + set_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0, value); + } + } else { + /* Rd is the same field in any formats */ + if (!fp) + value = regs->regs[insn.reg3_format.rd]; + else { + if (is_fpu_owner()) + value = read_fpr(insn.reg3_format.rd); + else + value = get_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0); + } + + res = unaligned_write(addr, value, size); + if (res) + goto fault; + } + +#ifdef CONFIG_DEBUG_FS + if (user) + unaligned_instructions_user++; + else + unaligned_instructions_kernel++; +#endif + + compute_return_era(regs); + + return; + +fault: + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return; + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return; + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return; +} + +#ifdef CONFIG_DEBUG_FS +static int __init debugfs_unaligned(void) +{ + struct dentry *d; + + d = debugfs_create_dir("loongarch", NULL); + if (!d) + return -ENOMEM; + + debugfs_create_u32("unaligned_instructions_user", + S_IRUGO, d, &unaligned_instructions_user); + debugfs_create_u32("unaligned_instructions_kernel", + S_IRUGO, d, &unaligned_instructions_kernel); + + return 0; +} +arch_initcall(debugfs_unaligned); +#endif diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index e36635fccb695..8678955303405 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,4 +3,4 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S new file mode 100644 index 0000000000000..9177fd638f072 --- /dev/null +++ b/arch/loongarch/lib/unaligned.S @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include + +#include +#include +#include +#include +#include +#include + +.L_fixup_handle_unaligned: + li.w a0, -EFAULT + jr ra + +/* + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign) + * + * a0: addr + * a1: value + * a2: n + * a3: sign + */ +SYM_FUNC_START(unaligned_read) + beqz a2, 5f + + li.w t2, 0 + addi.d t0, a2, -1 + slli.d t1, t0, 3 + add.d a0, a0, t0 + + beqz a3, 2f +1: ld.b t3, a0, 0 + b 3f + +2: ld.bu t3, a0, 0 +3: sll.d t3, t3, t1 + or t2, t2, t3 + addi.d t1, t1, -8 + addi.d a0, a0, -1 + addi.d a2, a2, -1 + bgtz a2, 2b +4: st.d t2, a1, 0 + + move a0, a2 + jr ra + +5: li.w a0, -EFAULT + jr ra + + _asm_extable 1b, .L_fixup_handle_unaligned + _asm_extable 2b, .L_fixup_handle_unaligned + _asm_extable 4b, .L_fixup_handle_unaligned +SYM_FUNC_END(unaligned_read) + +/* + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n) + * + * a0: addr + * a1: value + * a2: n + */ +SYM_FUNC_START(unaligned_write) + beqz a2, 3f + + li.w t0, 0 +1: srl.d t1, a1, t0 +2: st.b t1, a0, 0 + addi.d t0, t0, 8 + addi.d a2, a2, -1 + addi.d a0, a0, 1 + bgtz a2, 1b + + move a0, a2 + jr ra + +3: li.w a0, -EFAULT + jr ra + + _asm_extable 2b, .L_fixup_handle_unaligned +SYM_FUNC_END(unaligned_write) -- GitLab From 19e5eb15b00c5841b4b9bd9777af2865a40d2f39 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 578/694] LoongArch: Add alternative runtime patching mechanism Introduce the "alternative" mechanism from ARM64 and x86 for LoongArch to apply runtime patching. The main purpose of this patch is to provide a framework. In future we can use this mechanism (i.e., the ALTERNATIVE and ALTERNATIVE_2 macros) to optimize hotspot functions according to cpu features. Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/alternative-asm.h | 82 +++++++ arch/loongarch/include/asm/alternative.h | 111 +++++++++ arch/loongarch/include/asm/bugs.h | 15 ++ arch/loongarch/include/asm/inst.h | 18 ++ arch/loongarch/kernel/Makefile | 2 +- arch/loongarch/kernel/alternative.c | 246 +++++++++++++++++++ arch/loongarch/kernel/module.c | 15 ++ arch/loongarch/kernel/setup.c | 7 + arch/loongarch/kernel/vmlinux.lds.S | 12 + 9 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/alternative-asm.h create mode 100644 arch/loongarch/include/asm/alternative.h create mode 100644 arch/loongarch/include/asm/bugs.h create mode 100644 arch/loongarch/kernel/alternative.c diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h new file mode 100644 index 0000000000000..ff3d10ac393f2 --- /dev/null +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .short \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".fill" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h new file mode 100644 index 0000000000000..cee7b29785ab2 --- /dev/null +++ b/arch/loongarch/include/asm/alternative.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* offset to original instruction */ + s32 replace_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x03400000\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .short " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h new file mode 100644 index 0000000000000..98396535163b3 --- /dev/null +++ b/arch/loongarch/include/asm/bugs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BUGS_H +#define _ASM_BUGS_H + +#include +#include + +extern void check_bugs(void); + +#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 889d6c9fc2b6a..67215af47b3d5 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -8,6 +8,7 @@ #include #include +#define INSN_NOP 0x03400000 #define INSN_BREAK 0x002a0000 #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 @@ -28,6 +29,7 @@ enum reg0i26_op { enum reg1i20_op { lu12iw_op = 0x0a, lu32id_op = 0x0b, + pcaddi_op = 0x0c, pcaddu12i_op = 0x0e, pcaddu18i_op = 0x0f, }; @@ -35,6 +37,8 @@ enum reg1i20_op { enum reg1i21_op { beqz_op = 0x10, bnez_op = 0x11, + bceqz_op = 0x12, /* bits[9:8] = 0x00 */ + bcnez_op = 0x12, /* bits[9:8] = 0x01 */ }; enum reg2_op { @@ -315,6 +319,12 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit) return val & (1UL << (bit - 1)); } +static inline bool is_pc_ins(union loongarch_instruction *ip) +{ + return ip->reg1i20_format.opcode >= pcaddi_op && + ip->reg1i20_format.opcode <= pcaddu18i_op; +} + static inline bool is_branch_ins(union loongarch_instruction *ip) { return ip->reg1i21_format.opcode >= beqz_op && @@ -353,6 +363,14 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) return val < (1UL << bit); } +static inline unsigned long sign_extend(unsigned long val, unsigned int idx) +{ + if (!is_imm_negative(val, idx + 1)) + return ((1UL << idx) - 1) & val; + else + return ~((1UL << idx) - 1) | val; +} + #define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ int offset) \ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 2ad2555b53eaf..86744531b100a 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -8,7 +8,7 @@ extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \ elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ - unaligned.o + alternative.o unaligned.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c new file mode 100644 index 0000000000000..c5aebeac960b6 --- /dev/null +++ b/arch/loongarch/kernel/alternative.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + +#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE) + +static int __initdata_or_module debug_alternative; + +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_WORDS(buf, count, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int _j; \ + union loongarch_instruction *_buf = buf; \ + \ + if (!(count)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (_j = 0; _j < count - 1; _j++) \ + printk(KERN_CONT "<%08x> ", _buf[_j].word); \ + printk(KERN_CONT "<%08x>\n", _buf[_j].word); \ + } \ +} while (0) + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(union loongarch_instruction *insn, int count) +{ + while (count--) { + insn->word = INSN_NOP; + insn++; + } +} + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, + void *start, void *end) +{ + unsigned int si, si_l, si_h; + unsigned long cur_pc, jump_addr, pc; + long offset; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_l = src->reg0i26_format.immediate_l; + si_h = src->reg0i26_format.immediate_h; + switch (src->reg0i26_format.opcode) { + case b_op: + case bl_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + offset >>= 2; + buf->reg0i26_format.immediate_h = offset >> 16; + buf->reg0i26_format.immediate_l = offset; + return; + } + + si_l = src->reg1i21_format.immediate_l; + si_h = src->reg1i21_format.immediate_h; + switch (src->reg1i21_format.opcode) { + case bceqz_op: /* bceqz_op = bcnez_op */ + BUG_ON(buf->reg1i21_format.rj & BIT(4)); + fallthrough; + case beqz_op: + case bnez_op: + jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_4M || offset >= SZ_4M); + offset >>= 2; + buf->reg1i21_format.immediate_h = offset >> 16; + buf->reg1i21_format.immediate_l = offset; + return; + } + + si = src->reg2i16_format.immediate; + switch (src->reg2i16_format.opcode) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + jump_addr = cur_pc + sign_extend(si << 2, 17); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128K || offset >= SZ_128K); + offset >>= 2; + buf->reg2i16_format.immediate = offset; + return; + } +} + +static int __init_or_module copy_alt_insns(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i].word = src[i].word; + + if (is_pc_ins(&src[i])) { + pr_err("Not support pcrel instruction at present!"); + return -EINVAL; + } + + if (is_branch_ins(&src[i]) && + src[i].reg2i16_format.opcode != jirl_op) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } + } + + return 0; +} + +/* + * text_poke_early - Update instructions on a live kernel at boot time + * + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +static void *__init_or_module text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr) +{ + int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < nr; i++) + insn[i].word = buf[i].word; + + local_irq_restore(flags); + + wbflush(); + flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr)); + + return insn; +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + */ +void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + unsigned int nr_instr, nr_repl, nr_insnbuf; + union loongarch_instruction *instr, *replacement; + union loongarch_instruction insnbuf[MAX_PATCH_SIZE]; + + DPRINTK("alt table %px, -> %px", start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->replace_offset + a->replace_offset; + + BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen & 0x3); + BUG_ON(a->replacementlen & 0x3); + + nr_instr = a->instrlen / LOONGARCH_INSN_SIZE; + nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE; + + if (!cpu_has(a->feature)) { + DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + continue; + } + + DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr); + DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement); + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr); + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + alternatives_patched = 1; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 097595b2fc14b..825fcf77f9e73 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -17,6 +17,7 @@ #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -456,3 +457,17 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (!strcmp(".altinstructions", secstrs + s->sh_name)) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + } + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index ae436def7ee98..53831bcb11ca3 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -31,7 +31,9 @@ #include #include +#include #include +#include #include #include #include @@ -80,6 +82,11 @@ const char *get_system_type(void) return "generic-loongson-machine"; } +void __init check_bugs(void) +{ + alternative_instructions(); +} + static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index efecda0c23616..733b16e8d55dd 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -54,6 +54,18 @@ SECTIONS . = ALIGN(PECOFF_SEGMENT_ALIGN); _etext = .; + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } -- GitLab From a275a82dcd4024c75337db15d59ed039c31e21da Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:39:59 +0800 Subject: [PATCH 579/694] LoongArch: Use alternative to optimize libraries Use the alternative to optimize common libraries according whether CPU has UAL (hardware unaligned access support) feature, including memset(), memcopy(), memmove(), copy_user() and clear_user(). We have tested UnixBench on a Loongson-3A5000 quad-core machine (1.6GHz): 1, One copy, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9566582.0 819.8 Double-Precision Whetstone 55.0 2805.3 510.1 Execl Throughput 43.0 2120.0 493.0 File Copy 1024 bufsize 2000 maxblocks 3960.0 209833.0 529.9 File Copy 256 bufsize 500 maxblocks 1655.0 89400.0 540.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 320036.0 551.8 Pipe Throughput 12440.0 340624.0 273.8 Pipe-based Context Switching 4000.0 109939.1 274.8 Process Creation 126.0 4728.7 375.3 Shell Scripts (1 concurrent) 42.4 2223.1 524.3 Shell Scripts (8 concurrent) 6.0 883.1 1471.9 System Call Overhead 15000.0 518639.1 345.8 ======== System Benchmarks Index Score 500.2 2, One copy, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 9567674.7 819.9 Double-Precision Whetstone 55.0 2805.5 510.1 Execl Throughput 43.0 2392.7 556.4 File Copy 1024 bufsize 2000 maxblocks 3960.0 417804.0 1055.1 File Copy 256 bufsize 500 maxblocks 1655.0 112909.5 682.2 File Copy 4096 bufsize 8000 maxblocks 5800.0 1255207.4 2164.2 Pipe Throughput 12440.0 555712.0 446.7 Pipe-based Context Switching 4000.0 99964.5 249.9 Process Creation 126.0 5192.5 412.1 Shell Scripts (1 concurrent) 42.4 2302.4 543.0 Shell Scripts (8 concurrent) 6.0 919.6 1532.6 System Call Overhead 15000.0 511159.3 340.8 ======== System Benchmarks Index Score 640.1 3, Four copies, before patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38268610.5 3279.2 Double-Precision Whetstone 55.0 11222.2 2040.4 Execl Throughput 43.0 7892.0 1835.3 File Copy 1024 bufsize 2000 maxblocks 3960.0 235149.6 593.8 File Copy 256 bufsize 500 maxblocks 1655.0 74959.6 452.9 File Copy 4096 bufsize 8000 maxblocks 5800.0 545048.5 939.7 Pipe Throughput 12440.0 1337359.0 1075.0 Pipe-based Context Switching 4000.0 473663.9 1184.2 Process Creation 126.0 17491.2 1388.2 Shell Scripts (1 concurrent) 42.4 6865.7 1619.3 Shell Scripts (8 concurrent) 6.0 1015.9 1693.1 System Call Overhead 15000.0 1899535.2 1266.4 ======== System Benchmarks Index Score 1278.3 4, Four copies, after patch: System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 38272815.5 3279.6 Double-Precision Whetstone 55.0 11222.8 2040.5 Execl Throughput 43.0 8839.2 2055.6 File Copy 1024 bufsize 2000 maxblocks 3960.0 313912.9 792.7 File Copy 256 bufsize 500 maxblocks 1655.0 80976.1 489.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 1176594.3 2028.6 Pipe Throughput 12440.0 2100941.9 1688.9 Pipe-based Context Switching 4000.0 476696.4 1191.7 Process Creation 126.0 18394.7 1459.9 Shell Scripts (1 concurrent) 42.4 7172.2 1691.6 Shell Scripts (8 concurrent) 6.0 1058.3 1763.9 System Call Overhead 15000.0 1874714.7 1249.8 ======== System Benchmarks Index Score 1488.8 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/string.h | 5 ++ arch/loongarch/lib/Makefile | 3 +- arch/loongarch/lib/clear_user.S | 70 ++++++++++++++-- arch/loongarch/lib/copy_user.S | 91 +++++++++++++++++++-- arch/loongarch/lib/memcpy.S | 95 ++++++++++++++++++++++ arch/loongarch/lib/memmove.S | 121 ++++++++++++++++++++++++++++ arch/loongarch/lib/memset.S | 91 +++++++++++++++++++++ 7 files changed, 465 insertions(+), 11 deletions(-) create mode 100644 arch/loongarch/lib/memcpy.S create mode 100644 arch/loongarch/lib/memmove.S create mode 100644 arch/loongarch/lib/memset.S diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index b07e60ded9577..7b29cc9c70aa6 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -5,8 +5,13 @@ #ifndef _ASM_STRING_H #define _ASM_STRING_H +#define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 8678955303405..40bde632900fc 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,4 +3,5 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o +lib-y += delay.o memset.o memcpy.o memmove.o \ + clear_user.o copy_user.o dump_tlb.o unaligned.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index d5c9e44ac8c4f..2dc48e61a2c8c 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,25 +3,37 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a1, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__clear_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __clear_user_generic", \ + "b __clear_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) + /* - * unsigned long __clear_user(void *addr, size_t size) + * unsigned long __clear_user_generic(void *addr, size_t size) * * a0: addr * a1: size */ -SYM_FUNC_START(__clear_user) +SYM_FUNC_START(__clear_user_generic) beqz a1, 2f 1: st.b zero, a0, 0 @@ -33,6 +45,54 @@ SYM_FUNC_START(__clear_user) jr ra _asm_extable 1b, .L_fixup_handle_0 -SYM_FUNC_END(__clear_user) +SYM_FUNC_END(__clear_user_generic) -EXPORT_SYMBOL(__clear_user) +/* + * unsigned long __clear_user_fast(void *addr, unsigned long size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user_fast) + beqz a1, 10f + + ori a2, zero, 64 + blt a1, a2, 9f + + /* set 64 bytes at a time */ +1: st.d zero, a0, 0 +2: st.d zero, a0, 8 +3: st.d zero, a0, 16 +4: st.d zero, a0, 24 +5: st.d zero, a0, 32 +6: st.d zero, a0, 40 +7: st.d zero, a0, 48 +8: st.d zero, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, -64 + bge a1, a2, 1b + + beqz a1, 10f + + /* set the remaining bytes */ +9: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 9b + + /* return */ +10: move a0, a1 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 +SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 61933d964da03..55ac6020a1ad1 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,26 +3,38 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a2, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__copy_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __copy_user_generic", \ + "b __copy_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) + /* - * unsigned long __copy_user(void *to, const void *from, size_t n) + * unsigned long __copy_user_generic(void *to, const void *from, size_t n) * * a0: to * a1: from * a2: n */ -SYM_FUNC_START(__copy_user) +SYM_FUNC_START(__copy_user_generic) beqz a2, 3f 1: ld.b t0, a1, 0 @@ -37,6 +49,75 @@ SYM_FUNC_START(__copy_user) _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_0 -SYM_FUNC_END(__copy_user) +SYM_FUNC_END(__copy_user_generic) -EXPORT_SYMBOL(__copy_user) +/* + * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user_fast) + beqz a2, 19f + + ori a3, zero, 64 + blt a2, a3, 17f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 +2: ld.d t1, a1, 8 +3: ld.d t2, a1, 16 +4: ld.d t3, a1, 24 +5: ld.d t4, a1, 32 +6: ld.d t5, a1, 40 +7: ld.d t6, a1, 48 +8: ld.d t7, a1, 56 +9: st.d t0, a0, 0 +10: st.d t1, a0, 8 +11: st.d t2, a0, 16 +12: st.d t3, a0, 24 +13: st.d t4, a0, 32 +14: st.d t5, a0, 40 +15: st.d t6, a0, 48 +16: st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a3, 1b + + beqz a2, 19f + + /* copy the remaining bytes */ +17: ld.b t0, a1, 0 +18: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 17b + + /* return */ +19: move a0, a2 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_4 + _asm_extable 14b, .L_fixup_handle_5 + _asm_extable 15b, .L_fixup_handle_6 + _asm_extable 16b, .L_fixup_handle_7 + _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 18b, .L_fixup_handle_0 +SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S new file mode 100644 index 0000000000000..7c07d595ee89a --- /dev/null +++ b/arch/loongarch/lib/memcpy.S @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memcpy_generic", \ + "b __memcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) + +/* + * void *__memcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_generic) + move a3, a0 + beqz a2, 2f + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_generic) + +/* + * void *__memcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + st.d t0, a0, 0 + st.d t1, a0, 8 + st.d t2, a0, 16 + st.d t3, a0, 24 + st.d t4, a0, 32 + st.d t5, a0, 40 + st.d t6, a0, 48 + st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S new file mode 100644 index 0000000000000..6ffdb46da78fd --- /dev/null +++ b/arch/loongarch/lib/memmove.S @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 3f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + + /* if (src - dst) < 64, copy 1 byte at a time */ +1: ori a3, zero, 64 + sub.d t0, a1, a0 + blt t0, a3, 2f + b memcpy +2: b __memcpy_generic + + /* if (dst - src) < 64, copy 1 byte at a time */ +3: ori a3, zero, 64 + sub.d t0, a0, a1 + blt t0, a3, 4f + b rmemcpy +4: b __rmemcpy_generic +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) + +SYM_FUNC_START(rmemcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __rmemcpy_generic", \ + "b __rmemcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(rmemcpy) + +/* + * void *__rmemcpy_generic(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_generic) + move a3, a0 + beqz a2, 2f + + add.d a0, a0, a2 + add.d a1, a1, a2 + +1: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_generic) + +/* + * void *__rmemcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_fast) + move a3, a0 + beqz a2, 3f + + add.d a0, a0, a2 + add.d a1, a1, a2 + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, -8 + ld.d t1, a1, -16 + ld.d t2, a1, -24 + ld.d t3, a1, -32 + ld.d t4, a1, -40 + ld.d t5, a1, -48 + ld.d t6, a1, -56 + ld.d t7, a1, -64 + st.d t0, a0, -8 + st.d t1, a0, -16 + st.d t2, a0, -24 + st.d t3, a0, -32 + st.d t4, a0, -40 + st.d t5, a0, -48 + st.d t6, a0, -56 + st.d t7, a0, -64 + + addi.d a0, a0, -64 + addi.d a1, a1, -64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S new file mode 100644 index 0000000000000..e7cb4ea3747d7 --- /dev/null +++ b/arch/loongarch/lib/memset.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include + +.macro fill_to_64 r0 + bstrins.d \r0, \r0, 15, 8 + bstrins.d \r0, \r0, 31, 16 + bstrins.d \r0, \r0, 63, 32 +.endm + +SYM_FUNC_START(memset) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memset_generic", \ + "b __memset_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) + +/* + * void *__memset_generic(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_generic) + move a3, a0 + beqz a2, 2f + +1: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(__memset_generic) + +/* + * void *__memset_fast(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* fill a1 to 64 bits */ + fill_to_64 a1 + + /* set 64 bytes at a time */ +1: st.d a1, a0, 0 + st.d a1, a0, 8 + st.d a1, a0, 16 + st.d a1, a0, 24 + st.d a1, a0, 32 + st.d a1, a0, 40 + st.d a1, a0, 48 + st.d a1, a0, 56 + + addi.d a0, a0, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* set the remaining bytes */ +2: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memset_fast) -- GitLab From 88d4d957edc707e037449ef71a58c6530a39d01e Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 10 Dec 2022 22:40:05 +0800 Subject: [PATCH 580/694] LoongArch: Add FDT booting support from efi system table Since commit 40cd01a9c324("efi/loongarch: libstub: remove dependency on flattened DT"), we can parse the FDT from efi system table. And now, LoongArch is coming to support booting with FDT, so we add the relevant booting support as well as parameter parsing. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/efi.h | 1 + arch/loongarch/include/asm/setup.h | 1 + arch/loongarch/kernel/acpi.c | 11 +++++- arch/loongarch/kernel/efi.c | 15 +++++++- arch/loongarch/kernel/env.c | 2 + arch/loongarch/kernel/numa.c | 17 +++++++- arch/loongarch/kernel/setup.c | 62 +++++++++++++++++++++++++++++- arch/loongarch/kernel/smp.c | 34 ++++++++++++++++ arch/loongarch/pci/acpi.c | 7 +++- 10 files changed, 145 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0daf6263655b3..48db4b27b9afa 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -111,6 +111,8 @@ config LOONGARCH select MODULES_USE_ELF_RELA if MODULES select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select OF + select OF_EARLY_FLATTREE select PCI select PCI_DOMAINS_GENERIC select PCI_ECAM if ACPI diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 174567b00ddb9..81e5d33718684 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -9,6 +9,7 @@ void __init efi_init(void); void __init efi_runtime_init(void); +void __init *efi_fdt_pointer(void); void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index ca373f8e3c4db..72ead58039f3e 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -13,6 +13,7 @@ extern unsigned long eentry; extern unsigned long tlbrentry; +extern char init_command_line[COMMAND_LINE_SIZE]; extern void tlb_init(int cpu); extern void cpu_cache_init(void); extern void cache_error_setup(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 8319cc4090090..5a63f85f57984 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -145,14 +146,14 @@ void __init acpi_boot_table_init(void) * If acpi_disabled, bail out */ if (acpi_disabled) - return; + goto fdt_earlycon; /* * Initialize the ACPI boot-time table parser. */ if (acpi_table_init()) { disable_acpi(); - return; + goto fdt_earlycon; } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -164,6 +165,12 @@ void __init acpi_boot_table_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + + return; + +fdt_earlycon: + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index a313299711339..ea485b0e1e7f5 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -28,16 +28,29 @@ static unsigned long efi_nr_tables; static unsigned long efi_config_table; static unsigned long __initdata boot_memmap = EFI_INVALID_TABLE_ADDR; +static unsigned long __initdata fdt_pointer = EFI_INVALID_TABLE_ADDR; static efi_system_table_t *efi_systab; static efi_config_table_type_t arch_tables[] __initdata = { {LINUX_EFI_BOOT_MEMMAP_GUID, &boot_memmap, "MEMMAP" }, + {DEVICE_TREE_GUID, &fdt_pointer, "FDTPTR" }, {}, }; +void __init *efi_fdt_pointer(void) +{ + if (!efi_systab) + return NULL; + + if (fdt_pointer == EFI_INVALID_TABLE_ADDR) + return NULL; + + return early_memremap_ro(fdt_pointer, SZ_64K); +} + void __init efi_runtime_init(void) { - if (!efi_enabled(EFI_BOOT)) + if (!efi_enabled(EFI_BOOT) || !efi_systab->runtime) return; if (efi_runtime_disabled()) { diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 6d56a463b091c..6b3bfb0092e60 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -11,6 +11,7 @@ #include #include #include +#include u64 efi_system_table; struct loongson_system_configuration loongson_sysconf; @@ -27,6 +28,7 @@ void __init init_environ(void) clear_bit(EFI_BOOT, &efi.flags); strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); + strscpy(init_command_line, cmdline, COMMAND_LINE_SIZE); early_memunmap(cmdline, COMMAND_LINE_SIZE); efi_system_table = fw_arg2; diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a13f92593cfda..3019ca14c760b 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -388,6 +388,21 @@ static void __init numa_default_distance(void) } } +/* + * fake_numa_init() - For Non-ACPI systems + * Return: 0 on success, -errno on failure. + */ +static int __init fake_numa_init(void) +{ + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM() - 1; + + node_set(0, numa_nodes_parsed); + pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end); + + return numa_add_memblk(0, start, end + 1); +} + int __init init_numa_memory(void) { int i; @@ -404,7 +419,7 @@ int __init init_numa_memory(void) memset(&numa_meminfo, 0, sizeof(numa_meminfo)); /* Parse SRAT and SLIT if provided by firmware. */ - ret = acpi_numa_init(); + ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); if (ret < 0) return ret; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 53831bcb11ca3..f9f5a130710ca 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include @@ -69,6 +72,7 @@ static const char dmi_empty_string[] = " "; * * These are initialized so they are in the .data section */ +char init_command_line[COMMAND_LINE_SIZE] __initdata; static int num_standard_resources; static struct resource *standard_resources; @@ -253,6 +257,58 @@ static void __init arch_parse_crashkernel(void) #endif } +static void __init fdt_setup(void) +{ +#ifdef CONFIG_OF_EARLY_FLATTREE + void *fdt_pointer; + + /* ACPI-based systems do not require parsing fdt */ + if (acpi_os_get_root_pointer()) + return; + + /* Look for a device tree configuration table entry */ + fdt_pointer = efi_fdt_pointer(); + if (!fdt_pointer || fdt_check_header(fdt_pointer)) + return; + + early_init_dt_scan(fdt_pointer); + early_init_fdt_reserve_self(); + + max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); +#endif +} + +static void __init bootcmdline_init(char **cmdline_p) +{ + /* + * If CONFIG_CMDLINE_FORCE is enabled then initializing the command line + * is trivial - we simply use the built-in command line unconditionally & + * unmodified. + */ + if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + goto out; + } + +#ifdef CONFIG_OF_FLATTREE + /* + * If CONFIG_CMDLINE_BOOTLOADER is enabled and we are in FDT-based system, + * the boot_command_line will be overwritten by early_init_dt_scan_chosen(). + * So we need to append init_command_line (the original copy of boot_command_line) + * to boot_command_line. + */ + if (initial_boot_params) { + if (boot_command_line[0]) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + + strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE); + } +#endif + +out: + *cmdline_p = boot_command_line; +} + void __init platform_init(void) { arch_reserve_vmcore(); @@ -265,6 +321,7 @@ void __init platform_init(void) acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif + unflatten_and_copy_device_tree(); #ifdef CONFIG_NUMA init_numa_memory(); @@ -297,6 +354,8 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); + early_init_fdt_scan_reserved_mem(); + /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate @@ -422,12 +481,13 @@ static void __init prefill_possible_map(void) void __init setup_arch(char **cmdline_p) { cpu_probe(); - *cmdline_p = boot_command_line; init_environ(); efi_init(); + fdt_setup(); memblock_init(); pagetable_init(); + bootcmdline_init(cmdline_p); parse_early_param(); reserve_initrd_mem(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 14508d429ffa3..b78816cf74ae5 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -180,8 +180,42 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } +static void __init fdt_smp_setup(void) +{ +#ifdef CONFIG_OF + unsigned int cpu, cpuid; + struct device_node *node = NULL; + + for_each_of_cpu_node(node) { + if (!of_device_is_available(node)) + continue; + + cpuid = of_get_cpu_hwid(node, 0); + if (cpuid >= nr_cpu_ids) + continue; + + if (cpuid == loongson_sysconf.boot_cpu_id) { + cpu = 0; + numa_add_cpu(cpu); + } else { + cpu = cpumask_next_zero(-1, cpu_present_mask); + } + + num_processors++; + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; + } + + loongson_sysconf.nr_cpus = num_processors; +#endif +} + void __init loongson_smp_setup(void) { + fdt_smp_setup(); + cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 8235ec92b41fe..365f7de771cbb 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -26,9 +26,12 @@ void pcibios_add_bus(struct pci_bus *bus) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - struct pci_config_window *cfg = bridge->bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_device *adev = NULL; struct device *bus_dev = &bridge->bus->dev; + struct pci_config_window *cfg = bridge->bus->sysdata; + + if (!acpi_disabled) + adev = to_acpi_device(cfg->parent); ACPI_COMPANION_SET(&bridge->dev, adev); set_dev_node(bus_dev, pa_to_nid(cfg->res.start)); -- GitLab From 27cab431564edba9919d1a82c2d9636d622a2493 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Sat, 10 Dec 2022 22:40:05 +0800 Subject: [PATCH 581/694] LoongArch: Add processing ISA Node in DeviceTree Similar to commit 6d0068ad15e4f771b3 ("MIPS: Loongson64: Process ISA Node in DeviceTree"), we process ISA node in DeviceTree for FDT-based systems. Previously, we are hardcoding reserved ISA I/O Space in, now we are processing it I/O via DeviceTree directly. The ranges property of ISA node is used to determine the size and address of reserved I/O space. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index f9f5a130710ca..fdabf2ac1927d 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -429,6 +429,81 @@ static void __init resource_init(void) #endif } +static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, + resource_size_t hw_start, resource_size_t size) +{ + int ret = 0; + unsigned long vaddr; + struct logic_pio_hwaddr *range; + + range = kzalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return -ENOMEM; + + range->fwnode = fwnode; + range->size = size = round_up(size, PAGE_SIZE); + range->hw_start = hw_start; + range->flags = LOGIC_PIO_CPU_MMIO; + + ret = logic_pio_register_range(range); + if (ret) { + kfree(range); + return ret; + } + + /* Legacy ISA must placed at the start of PCI_IOBASE */ + if (range->io_start != 0) { + logic_pio_unregister_range(range); + kfree(range); + return -EINVAL; + } + + vaddr = (unsigned long)(PCI_IOBASE + range->io_start); + ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL)); + + return 0; +} + +static __init int arch_reserve_pio_range(void) +{ + struct device_node *np; + + for_each_node_by_name(np, "isa") { + struct of_range range; + struct of_range_parser parser; + + pr_info("ISA Bridge: %pOF\n", np); + + if (of_range_parser_init(&parser, np)) { + pr_info("Failed to parse resources.\n"); + of_node_put(np); + break; + } + + for_each_of_range(&parser, &range) { + switch (range.flags & IORESOURCE_TYPE_BITS) { + case IORESOURCE_IO: + pr_info(" IO 0x%016llx..0x%016llx -> 0x%016llx\n", + range.cpu_addr, + range.cpu_addr + range.size - 1, + range.bus_addr); + if (add_legacy_isa_io(&np->fwnode, range.cpu_addr, range.size)) + pr_warn("Failed to reserve legacy IO in Logic PIO\n"); + break; + case IORESOURCE_MEM: + pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx\n", + range.cpu_addr, + range.cpu_addr + range.size - 1, + range.bus_addr); + break; + } + } + } + + return 0; +} +arch_initcall(arch_reserve_pio_range); + static int __init reserve_memblock_reserved_regions(void) { u64 i, j; -- GitLab From 366bb35a8e48198cefcd3484ac6b2374d1347873 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 582/694] LoongArch: Add suspend (ACPI S3) support Add suspend (Suspend To RAM, aka ACPI S3) support for LoongArch. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 ++ arch/loongarch/Makefile | 3 + arch/loongarch/include/asm/acpi.h | 10 +++ arch/loongarch/include/asm/bootinfo.h | 1 + arch/loongarch/include/asm/loongson.h | 3 + arch/loongarch/include/asm/time.h | 1 + arch/loongarch/kernel/acpi.c | 6 ++ arch/loongarch/kernel/smp.c | 1 + arch/loongarch/kernel/time.c | 11 +++- arch/loongarch/power/Makefile | 3 + arch/loongarch/power/platform.c | 57 +++++++++++++++++ arch/loongarch/power/suspend.c | 73 ++++++++++++++++++++++ arch/loongarch/power/suspend_asm.S | 89 +++++++++++++++++++++++++++ 13 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 arch/loongarch/power/Makefile create mode 100644 arch/loongarch/power/platform.c create mode 100644 arch/loongarch/power/suspend.c create mode 100644 arch/loongarch/power/suspend_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 48db4b27b9afa..0c8b2b1a96267 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -56,6 +56,7 @@ config LOONGARCH select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT select COMMON_CLK + select CPU_PM select EFI select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE @@ -517,6 +518,10 @@ config ARCH_MMAP_RND_BITS_MAX menu "Power management options" +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" endmenu diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 01b57b7263225..5232d8c0f9caf 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -104,6 +104,9 @@ endif libs-y += arch/loongarch/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/loongarch/power/ + ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 825c2519b9d1f..4198753aa1d0f 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -35,4 +35,14 @@ extern struct list_head acpi_wakeup_device_list; #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT +extern int loongarch_acpi_suspend(void); +extern int (*acpi_suspend_lowlevel)(void); +extern void loongarch_suspend_enter(void); + +static inline unsigned long acpi_get_wakeup_address(void) +{ + extern void loongarch_wakeup_start(void); + return (unsigned long)loongarch_wakeup_start; +} + #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index ed0910e8b856b..0051b526ac6d3 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -32,6 +32,7 @@ struct loongson_system_configuration { int cores_per_node; int cores_per_package; unsigned long cores_io_master; + unsigned long suspend_addr; const char *cpuname; }; diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h index 00db93edae1ba..12494cffffd17 100644 --- a/arch/loongarch/include/asm/loongson.h +++ b/arch/loongarch/include/asm/loongson.h @@ -136,4 +136,7 @@ typedef enum { #define ls7a_writel(val, addr) *(volatile unsigned int *)TO_UNCACHE(addr) = (val) #define ls7a_writeq(val, addr) *(volatile unsigned long *)TO_UNCACHE(addr) = (val) +void enable_gpe_wakeup(void); +void enable_pci_wakeup(void); + #endif /* __ASM_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h index 2eae219301d0a..037a2d1b8ff4c 100644 --- a/arch/loongarch/include/asm/time.h +++ b/arch/loongarch/include/asm/time.h @@ -12,6 +12,7 @@ extern u64 cpu_clock_freq; extern u64 const_clock_freq; +extern void save_counter(void); extern void sync_counter(void); static inline unsigned int calc_const_freq(void) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 5a63f85f57984..98f431157e4c1 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -140,6 +140,12 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } +#ifndef CONFIG_SUSPEND +int (*acpi_suspend_lowlevel)(void); +#else +int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend; +#endif + void __init acpi_boot_table_init(void) { /* diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index b78816cf74ae5..8c6e227cb29df 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 786735dcc8d67..a6576dea590c0 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -115,12 +115,17 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_timeval; +static long init_offset __nosavedata; + +void save_counter(void) +{ + init_offset = drdtime(); +} void sync_counter(void) { /* Ensure counter begin at 0 */ - csr_write64(-init_timeval, LOONGARCH_CSR_CNTC); + csr_write64(init_offset, LOONGARCH_CSR_CNTC); } static int get_timer_irq(void) @@ -219,7 +224,7 @@ void __init time_init(void) else const_clock_freq = calc_const_freq(); - init_timeval = drdtime() - csr_read64(LOONGARCH_CSR_CNTC); + init_offset = -(drdtime() - csr_read64(LOONGARCH_CSR_CNTC)); constant_clockevent_init(); constant_clocksource_init(); diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile new file mode 100644 index 0000000000000..6740117decaa2 --- /dev/null +++ b/arch/loongarch/power/Makefile @@ -0,0 +1,3 @@ +obj-y += platform.o + +obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c new file mode 100644 index 0000000000000..3ea8e07aa225f --- /dev/null +++ b/arch/loongarch/power/platform.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include + +void enable_gpe_wakeup(void) +{ + if (acpi_disabled) + return; + + if (acpi_gbl_reduced_hardware) + return; + + acpi_enable_all_wakeup_gpes(); +} + +void enable_pci_wakeup(void) +{ + if (acpi_disabled) + return; + + if (acpi_gbl_reduced_hardware) + return; + + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_STATUS, 1); + + if (acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE) + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0); +} + +static int __init loongson3_acpi_suspend_init(void) +{ +#ifdef CONFIG_ACPI + acpi_status status; + uint64_t suspend_addr = 0; + + if (acpi_disabled || acpi_gbl_reduced_hardware) + return 0; + + acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); + status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr); + if (ACPI_FAILURE(status) || !suspend_addr) { + pr_err("ACPI S3 is not support!\n"); + return -1; + } + loongson_sysconf.suspend_addr = (u64)phys_to_virt(PHYSADDR(suspend_addr)); +#endif + return 0; +} + +device_initcall(loongson3_acpi_suspend_init); diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c new file mode 100644 index 0000000000000..5e19733e5e05f --- /dev/null +++ b/arch/loongarch/power/suspend.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson-specific suspend support + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +u64 loongarch_suspend_addr; + +struct saved_registers { + u32 ecfg; + u32 euen; + u64 pgd; + u64 kpgd; + u32 pwctl0; + u32 pwctl1; +}; +static struct saved_registers saved_regs; + +static void arch_common_suspend(void) +{ + save_counter(); + saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL); + saved_regs.kpgd = csr_read64(LOONGARCH_CSR_PGDH); + saved_regs.pwctl0 = csr_read32(LOONGARCH_CSR_PWCTL0); + saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1); + saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN); + + loongarch_suspend_addr = loongson_sysconf.suspend_addr; +} + +static void arch_common_resume(void) +{ + sync_counter(); + local_flush_tlb_all(); + csr_write64(per_cpu_offset(0), PERCPU_BASE_KS); + csr_write64(eentry, LOONGARCH_CSR_EENTRY); + csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); + csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + + csr_write64(saved_regs.pgd, LOONGARCH_CSR_PGDL); + csr_write64(saved_regs.kpgd, LOONGARCH_CSR_PGDH); + csr_write32(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0); + csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); + csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG); + csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN); +} + +int loongarch_acpi_suspend(void) +{ + enable_gpe_wakeup(); + enable_pci_wakeup(); + + arch_common_suspend(); + + /* processor specific suspend */ + loongarch_suspend_enter(); + + arch_common_resume(); + + return 0; +} diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S new file mode 100644 index 0000000000000..eb2675642f9f4 --- /dev/null +++ b/arch/loongarch/power/suspend_asm.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Sleep helper for Loongson-3 sleep mode. + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + +/* preparatory stuff */ +.macro SETUP_SLEEP + addi.d sp, sp, -PT_SIZE + st.d $r1, sp, PT_R1 + st.d $r2, sp, PT_R2 + st.d $r3, sp, PT_R3 + st.d $r4, sp, PT_R4 + st.d $r21, sp, PT_R21 + st.d $r22, sp, PT_R22 + st.d $r23, sp, PT_R23 + st.d $r24, sp, PT_R24 + st.d $r25, sp, PT_R25 + st.d $r26, sp, PT_R26 + st.d $r27, sp, PT_R27 + st.d $r28, sp, PT_R28 + st.d $r29, sp, PT_R29 + st.d $r30, sp, PT_R30 + st.d $r31, sp, PT_R31 + + la.pcrel t0, acpi_saved_sp + st.d sp, t0, 0 +.endm + +.macro SETUP_WAKEUP + ld.d $r1, sp, PT_R1 + ld.d $r2, sp, PT_R2 + ld.d $r3, sp, PT_R3 + ld.d $r4, sp, PT_R4 + ld.d $r21, sp, PT_R21 + ld.d $r22, sp, PT_R22 + ld.d $r23, sp, PT_R23 + ld.d $r24, sp, PT_R24 + ld.d $r25, sp, PT_R25 + ld.d $r26, sp, PT_R26 + ld.d $r27, sp, PT_R27 + ld.d $r28, sp, PT_R28 + ld.d $r29, sp, PT_R29 + ld.d $r30, sp, PT_R30 + ld.d $r31, sp, PT_R31 +.endm + + .text + .align 12 + +/* Sleep/wakeup code for Loongson-3 */ +SYM_FUNC_START(loongarch_suspend_enter) + SETUP_SLEEP + bl __flush_cache_all + + /* Pass RA and SP to BIOS */ + addi.d a1, sp, 0 + la.pcrel a0, loongarch_wakeup_start + la.pcrel t0, loongarch_suspend_addr + ld.d t0, t0, 0 + jirl a0, t0, 0 /* Call BIOS's STR sleep routine */ + + /* + * This is where we return upon wakeup. + * Reload all of the registers and return. + */ +SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + + la.abs t0, 0f + jr t0 +0: + la.pcrel t0, acpi_saved_sp + ld.d sp, t0, 0 + SETUP_WAKEUP + addi.d sp, sp, PT_SIZE + jr ra +SYM_FUNC_END(loongarch_suspend_enter) -- GitLab From 7db54bfe44a662c8f2c10277bccfa02c2f4c719c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 583/694] LoongArch: Add hibernation (ACPI S4) support Add hibernation (Suspend to Disk, aka ACPI S4) support for LoongArch. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 ++ arch/loongarch/kernel/asm-offsets.c | 12 +++++ arch/loongarch/kernel/reset.c | 5 +++ arch/loongarch/kernel/setup.c | 5 +++ arch/loongarch/power/Makefile | 1 + arch/loongarch/power/hibernate.c | 62 ++++++++++++++++++++++++++ arch/loongarch/power/hibernate_asm.S | 66 ++++++++++++++++++++++++++++ 7 files changed, 154 insertions(+) create mode 100644 arch/loongarch/power/hibernate.c create mode 100644 arch/loongarch/power/hibernate_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0c8b2b1a96267..576a649ac13c7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -521,6 +521,9 @@ menu "Power management options" config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_HIBERNATION_POSSIBLE + def_bool y + source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bdd88eda9513f..4ef494577813e 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -257,3 +257,15 @@ void output_smpboot_defines(void) BLANK(); } #endif + +#ifdef CONFIG_HIBERNATION +void output_pbe_defines(void) +{ + COMMENT(" Linux struct pbe offsets. "); + OFFSET(PBE_ADDRESS, pbe, address); + OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address); + OFFSET(PBE_NEXT, pbe, next); + DEFINE(PBE_SIZE, sizeof(struct pbe)); + BLANK(); +} +#endif diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 8c82021eb2f44..1ef8c63835351 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -15,6 +15,7 @@ #include #include #include +#include void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -41,6 +42,10 @@ void machine_power_off(void) #ifdef CONFIG_SMP preempt_disable(); smp_send_stop(); +#endif +#ifdef CONFIG_PM + if (!acpi_disabled) + enable_pci_wakeup(); #endif do_kernel_power_off(); #ifdef CONFIG_EFI diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index fdabf2ac1927d..4344502c0b317 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -370,6 +371,10 @@ static void __init arch_mem_init(char **cmdline_p) dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + /* Reserve for hibernation. */ + register_nosave_region(PFN_DOWN(__pa_symbol(&__nosave_begin)), + PFN_UP(__pa_symbol(&__nosave_end))); + memblock_dump_all(); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile index 6740117decaa2..58151d003e40e 100644 --- a/arch/loongarch/power/Makefile +++ b/arch/loongarch/power/Makefile @@ -1,3 +1,4 @@ obj-y += platform.o obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c new file mode 100644 index 0000000000000..1e0590542f987 --- /dev/null +++ b/arch/loongarch/power/hibernate.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +static u32 saved_crmd; +static u32 saved_prmd; +static u32 saved_euen; +static u32 saved_ecfg; +static u64 saved_pcpu_base; +struct pt_regs saved_regs; + +void save_processor_state(void) +{ + saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); + saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); + saved_euen = csr_read32(LOONGARCH_CSR_EUEN); + saved_ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_pcpu_base = csr_read64(PERCPU_BASE_KS); + + if (is_fpu_owner()) + save_fp(current); +} + +void restore_processor_state(void) +{ + csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); + csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); + csr_write32(saved_euen, LOONGARCH_CSR_EUEN); + csr_write32(saved_ecfg, LOONGARCH_CSR_ECFG); + csr_write64(saved_pcpu_base, PERCPU_BASE_KS); + + if (is_fpu_owner()) + restore_fp(current); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +extern int swsusp_asm_suspend(void); + +int swsusp_arch_suspend(void) +{ + enable_pci_wakeup(); + return swsusp_asm_suspend(); +} + +extern int swsusp_asm_resume(void); + +int swsusp_arch_resume(void) +{ + /* Avoid TLB mismatch during and after kernel resume */ + local_flush_tlb_all(); + return swsusp_asm_resume(); +} diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S new file mode 100644 index 0000000000000..3c747c08d65dc --- /dev/null +++ b/arch/loongarch/power/hibernate_asm.S @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hibernation support specific for LoongArch + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +.text +SYM_FUNC_START(swsusp_asm_suspend) + la.pcrel t0, saved_regs + PTR_S ra, t0, PT_R1 + PTR_S tp, t0, PT_R2 + PTR_S sp, t0, PT_R3 + PTR_S u0, t0, PT_R21 + PTR_S fp, t0, PT_R22 + PTR_S s0, t0, PT_R23 + PTR_S s1, t0, PT_R24 + PTR_S s2, t0, PT_R25 + PTR_S s3, t0, PT_R26 + PTR_S s4, t0, PT_R27 + PTR_S s5, t0, PT_R28 + PTR_S s6, t0, PT_R29 + PTR_S s7, t0, PT_R30 + PTR_S s8, t0, PT_R31 + b swsusp_save +SYM_FUNC_END(swsusp_asm_suspend) + +SYM_FUNC_START(swsusp_asm_resume) + la.pcrel t0, restore_pblist + PTR_L t0, t0, 0 +0: + PTR_L t1, t0, PBE_ADDRESS /* source */ + PTR_L t2, t0, PBE_ORIG_ADDRESS /* destination */ + PTR_LI t3, _PAGE_SIZE + PTR_ADD t3, t3, t1 +1: + REG_L t8, t1, 0 + REG_S t8, t2, 0 + PTR_ADDI t1, t1, SZREG + PTR_ADDI t2, t2, SZREG + bne t1, t3, 1b + PTR_L t0, t0, PBE_NEXT + bnez t0, 0b + la.pcrel t0, saved_regs + PTR_L ra, t0, PT_R1 + PTR_L tp, t0, PT_R2 + PTR_L sp, t0, PT_R3 + PTR_L u0, t0, PT_R21 + PTR_L fp, t0, PT_R22 + PTR_L s0, t0, PT_R23 + PTR_L s1, t0, PT_R24 + PTR_L s2, t0, PT_R25 + PTR_L s3, t0, PT_R26 + PTR_L s4, t0, PT_R27 + PTR_L s5, t0, PT_R28 + PTR_L s6, t0, PT_R29 + PTR_L s7, t0, PT_R30 + PTR_L s8, t0, PT_R31 + PTR_LI a0, 0x0 + jirl zero, ra, 0 +SYM_FUNC_END(swsusp_asm_resume) -- GitLab From 09f33601bf940f955c10a6e75a1c1b7bcadee5e2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 584/694] LoongArch: Add basic STACKPROTECTOR support Add basic stack protector support similar to other architectures. A constant canary value is set at boot time, and with help of compiler's -fstack-protector we can detect stack corruption. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/stackprotector.h | 38 +++++++++++++++++++++ arch/loongarch/kernel/asm-offsets.c | 3 ++ arch/loongarch/kernel/process.c | 6 ++++ arch/loongarch/kernel/switch.S | 5 +++ 5 files changed, 53 insertions(+) create mode 100644 arch/loongarch/include/asm/stackprotector.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 576a649ac13c7..28d827c6abb3b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -103,6 +103,7 @@ config LOONGARCH select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_SETUP_PER_CPU_AREA if NUMA + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h new file mode 100644 index 0000000000000..a1a965751a7b9 --- /dev/null +++ b/arch/loongarch/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary and + * on LoongArch gcc expects it to be defined by a global variable called + * "__stack_chk_guard". + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 4ef494577813e..4bdb203fc66e1 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -68,6 +68,9 @@ void output_task_defines(void) OFFSET(TASK_FLAGS, task_struct, flags); OFFSET(TASK_MM, task_struct, mm); OFFSET(TASK_PID, task_struct, pid); +#if defined(CONFIG_STACKPROTECTOR) + OFFSET(TASK_STACK_CANARY, task_struct, stack_canary); +#endif DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); BLANK(); } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index ddb8ba4eb399d..790cc14c5f06a 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -47,6 +47,12 @@ #include #include +#ifdef CONFIG_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + /* * Idle related variables and functions */ diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 202a163cb32f6..31dd8199b2453 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -23,6 +23,11 @@ SYM_FUNC_START(__switch_to) stptr.d ra, a0, THREAD_REG01 stptr.d a3, a0, THREAD_SCHED_RA stptr.d a4, a0, THREAD_SCHED_CFA +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) + la t7, __stack_chk_guard + LONG_L t8, a1, TASK_STACK_CANARY + LONG_S t8, t7, 0 +#endif move tp, a2 cpu_restore_nonscratch a1 -- GitLab From 9151dde40356880bb445f719f5ebbb1319054d5f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 585/694] LoongArch: module: Use got/plt section indices for relocations Instead of saving a pointer to the .got, .plt and .plt_idx sections to apply {got,plt}-based relocations, save and use their section indices instead. The mod->arch.{core,init}.{got,plt} pointers were problematic for live- patch because they pointed within temporary section headers (provided by the module loader via info->sechdrs) that would be freed after module load. Since livepatch modules may need to apply relocations post-module- load (for example, to patch a module that is loaded later), using section indices to offset into the section headers (instead of accessing them through a saved pointer) allows livepatch modules on LoongArch to pass in their own copy of the section headers to apply_relocate_add() to apply delayed relocations. The method used is same as commit c8ebf64eab743 ("arm64/module: use plt section indices for relocations"). Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/module.h | 22 +++++----- arch/loongarch/kernel/module-sections.c | 54 +++++++++++++------------ arch/loongarch/kernel/module.c | 39 ++++++++++++------ 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index b29b19a46f427..60dc62a1146ee 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -11,7 +11,7 @@ #define RELA_STACK_DEPTH 16 struct mod_section { - Elf_Shdr *shdr; + int shndx; int num_entries; int max_entries; }; @@ -37,8 +37,8 @@ struct plt_idx_entry { Elf_Addr symbol_addr; }; -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); static inline struct got_entry emit_got_entry(Elf_Addr val) { @@ -62,10 +62,10 @@ static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val) return (struct plt_idx_entry) { val }; } -static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) +static inline int get_plt_idx(unsigned long val, Elf_Shdr *sechdrs, const struct mod_section *sec) { int i; - struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sec->shdr->sh_addr; + struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) { if (plt_idx[i].symbol_addr == val) @@ -76,11 +76,12 @@ static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) } static inline struct plt_entry *get_plt_entry(unsigned long val, - const struct mod_section *sec_plt, - const struct mod_section *sec_plt_idx) + Elf_Shdr *sechdrs, + const struct mod_section *sec_plt, + const struct mod_section *sec_plt_idx) { - int plt_idx = get_plt_idx(val, sec_plt_idx); - struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + int plt_idx = get_plt_idx(val, sechdrs, sec_plt_idx); + struct plt_entry *plt = (struct plt_entry *)sechdrs[sec_plt->shndx].sh_addr; if (plt_idx < 0) return NULL; @@ -89,10 +90,11 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, } static inline struct got_entry *get_got_entry(Elf_Addr val, + Elf_Shdr *sechdrs, const struct mod_section *sec) { - struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; int i; + struct got_entry *got = (struct got_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) if (got[i].symbol_addr == val) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index d296a70b758fd..13d9a427325aa 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,17 +7,17 @@ #include #include -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { struct mod_section *got_sec = &mod->arch.got; int i = got_sec->num_entries; - struct got_entry *got = get_got_entry(val, got_sec); + struct got_entry *got = get_got_entry(val, sechdrs, got_sec); if (got) return (Elf_Addr)got; /* There is no GOT entry for val yet, create a new one. */ - got = (struct got_entry *)got_sec->shdr->sh_addr; + got = (struct got_entry *)sechdrs[got_sec->shndx].sh_addr; got[i] = emit_got_entry(val); got_sec->num_entries++; @@ -33,12 +33,12 @@ Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) return (Elf_Addr)&got[i]; } -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; struct mod_section *plt_idx_sec = &mod->arch.plt_idx; - struct plt_entry *plt = get_plt_entry(val, plt_sec, plt_idx_sec); + struct plt_entry *plt = get_plt_entry(val, sechdrs, plt_sec, plt_idx_sec); struct plt_idx_entry *plt_idx; if (plt) @@ -47,9 +47,9 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) nr = plt_sec->num_entries; /* There is no duplicate entry, create a new one */ - plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt = (struct plt_entry *)sechdrs[plt_sec->shndx].sh_addr; plt[nr] = emit_plt_entry(val); - plt_idx = (struct plt_idx_entry *)plt_idx_sec->shdr->sh_addr; + plt_idx = (struct plt_idx_entry *)sechdrs[plt_idx_sec->shndx].sh_addr; plt_idx[nr] = emit_plt_idx_entry(val); plt_sec->num_entries++; @@ -103,28 +103,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) - mod->arch.got.shdr = sechdrs + i; + mod->arch.got.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) - mod->arch.plt.shdr = sechdrs + i; + mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) - mod->arch.plt_idx.shdr = sechdrs + i; + mod->arch.plt_idx.shndx = i; } - if (!mod->arch.got.shdr) { + if (!mod->arch.got.shndx) { pr_err("%s: module GOT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt.shdr) { + if (!mod->arch.plt.shndx) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt_idx.shdr) { + if (!mod->arch.plt_idx.shndx) { pr_err("%s: module PLT.IDX section(s) missing\n", mod->name); return -ENOEXEC; } @@ -145,24 +146,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, count_max_entries(relas, num_rela, &num_plts, &num_gots); } - mod->arch.got.shdr->sh_type = SHT_NOBITS; - mod->arch.got.shdr->sh_flags = SHF_ALLOC; - mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + got_sec = sechdrs + mod->arch.got.shndx; + got_sec->sh_type = SHT_NOBITS; + got_sec->sh_flags = SHF_ALLOC; + got_sec->sh_addralign = L1_CACHE_BYTES; + got_sec->sh_size = (num_gots + 1) * sizeof(struct got_entry); mod->arch.got.num_entries = 0; mod->arch.got.max_entries = num_gots; - mod->arch.plt.shdr->sh_type = SHT_NOBITS; - mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + plt_sec = sechdrs + mod->arch.plt.shndx; + plt_sec->sh_type = SHT_NOBITS; + plt_sec->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + plt_sec->sh_addralign = L1_CACHE_BYTES; + plt_sec->sh_size = (num_plts + 1) * sizeof(struct plt_entry); mod->arch.plt.num_entries = 0; mod->arch.plt.max_entries = num_plts; - mod->arch.plt_idx.shdr->sh_type = SHT_NOBITS; - mod->arch.plt_idx.shdr->sh_flags = SHF_ALLOC; - mod->arch.plt_idx.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt_idx.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); + plt_idx_sec = sechdrs + mod->arch.plt_idx.shndx; + plt_idx_sec->sh_type = SHT_NOBITS; + plt_idx_sec->sh_flags = SHF_ALLOC; + plt_idx_sec->sh_addralign = L1_CACHE_BYTES; + plt_idx_sec->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 825fcf77f9e73..899dc677cec3e 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -99,16 +99,17 @@ static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Add return 0; } -static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type); } @@ -272,17 +273,18 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } -static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_b26(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; union loongarch_instruction *insn = (union loongarch_instruction *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); offset = (void *)v - (void *)location; @@ -339,10 +341,11 @@ static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, return 0; } -static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_got_pc(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { - Elf_Addr got = module_emit_got_entry(mod, v); + Elf_Addr got = module_emit_got_entry(mod, sechdrs, v); if (!got) return -EINVAL; @@ -387,13 +390,10 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel, [R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute, [R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup, - [R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel, [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, - [R_LARCH_B26] = apply_r_larch_b26, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, - [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, @@ -444,7 +444,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, sym->st_value, rel[i].r_addend, (u64)location); v = sym->st_value + rel[i].r_addend; - err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + switch (type) { + case R_LARCH_B26: + err = apply_r_larch_b26(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12: + err = apply_r_larch_got_pc(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_SOP_PUSH_PLT_PCREL: + err = apply_r_larch_sop_push_plt_pcrel(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + default: + err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + } if (err) return err; } -- GitLab From dbe3ba3018ec1fc53ea0d0adf0f687f5d438039d Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 586/694] LoongArch/ftrace: Add basic support This patch contains basic ftrace support for LoongArch. Specifically, function tracer (HAVE_FUNCTION_TRACER), function graph tracer (HAVE_ FUNCTION_GRAPH_TRACER) are implemented following the instructions in Documentation/trace/ftrace-design.txt. Use `-pg` makes stub like a child function `void _mcount(void *ra)`. Thus, it can be seen store RA and alloc stack before `call _mcount`. Find `alloc stack` at first, and then find `store RA`. Note that the functions in both inst.c and time.c should not be hooked with the compiler's -pg option: to prevent infinite self-referencing for the former, and to ignore early setup stuff for the latter. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/ftrace.h | 21 +++++++ arch/loongarch/kernel/Makefile | 8 +++ arch/loongarch/kernel/ftrace.c | 73 ++++++++++++++++++++++ arch/loongarch/kernel/mcount.S | 96 +++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+) create mode 100644 arch/loongarch/include/asm/ftrace.h create mode 100644 arch/loongarch/kernel/ftrace.c create mode 100644 arch/loongarch/kernel/mcount.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 28d827c6abb3b..72a41e08f9f46 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -90,6 +90,8 @@ config LOONGARCH select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h new file mode 100644 index 0000000000000..c3f5cde404646 --- /dev/null +++ b/arch/loongarch/include/asm/ftrace.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_LOONGARCH_FTRACE_H +#define _ASM_LOONGARCH_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +#define mcount _mcount +extern void _mcount(void); +extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old); +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_LOONGARCH_FTRACE_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 86744531b100a..3f71bce1c7ce0 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -15,6 +15,14 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +ifdef CONFIG_FUNCTION_TRACER +obj-y += mcount.o ftrace.o +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) +endif + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c new file mode 100644 index 0000000000000..8c3ec1bc7aad3 --- /dev/null +++ b/arch/loongarch/kernel/ftrace.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * As `call _mcount` follows LoongArch psABI, ra-saved operation and + * stack operation can be found before this insn. + */ + +static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off) +{ + int limit = 32; + union loongarch_instruction *insn; + + insn = (union loongarch_instruction *)insn_addr; + + do { + insn--; + limit--; + + if (is_ra_save_ins(insn)) + *ra_off = -((1 << 12) - insn->reg2i12_format.immediate); + + } while (!is_stack_alloc_ins(insn) && limit); + + if (!limit) + return -EINVAL; + + return 0; +} + +void prepare_ftrace_return(unsigned long self_addr, + unsigned long callsite_sp, unsigned long old) +{ + int ra_off; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + if (ftrace_get_parent_ra_addr(self_addr, &ra_off)) + goto out; + + if (!function_graph_enter(old, self_addr, 0, NULL)) + *(unsigned long *)(callsite_sp + ra_off) = return_hooker; + + return; + +out: + ftrace_graph_stop(); + WARN_ON(1); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S new file mode 100644 index 0000000000000..8cdc1563cd33f --- /dev/null +++ b/arch/loongarch/kernel/mcount.S @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch specific _mcount support + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text + +#define MCOUNT_S0_OFFSET (0) +#define MCOUNT_RA_OFFSET (SZREG) +#define MCOUNT_STACK_SIZE (2 * SZREG) + + .macro MCOUNT_SAVE_REGS + PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE + PTR_S s0, sp, MCOUNT_S0_OFFSET + PTR_S ra, sp, MCOUNT_RA_OFFSET + move s0, a0 + .endm + + .macro MCOUNT_RESTORE_REGS + move a0, s0 + PTR_L ra, sp, MCOUNT_RA_OFFSET + PTR_L s0, sp, MCOUNT_S0_OFFSET + PTR_ADDI sp, sp, MCOUNT_STACK_SIZE + .endm + +SYM_FUNC_START(_mcount) + la.pcrel t1, ftrace_stub + la.pcrel t2, ftrace_trace_function /* Prepare t2 for (1) */ + PTR_L t2, t2, 0 + beq t1, t2, fgraph_trace + + MCOUNT_SAVE_REGS + + move a0, ra /* arg0: self return address */ + move a1, s0 /* arg1: parent's return address */ + jirl ra, t2, 0 /* (1) call *ftrace_trace_function */ + + MCOUNT_RESTORE_REGS + +fgraph_trace: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la.pcrel t1, ftrace_stub + la.pcrel t3, ftrace_graph_return + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller + la.pcrel t1, ftrace_graph_entry_stub + la.pcrel t3, ftrace_graph_entry + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller +#endif + +SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + jr ra +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_INNER_LABEL(ftrace_graph_func, SYM_L_GLOBAL) + bl ftrace_stub +#endif +SYM_FUNC_END(_mcount) +EXPORT_SYMBOL(_mcount) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_FUNC_START(ftrace_graph_caller) + MCOUNT_SAVE_REGS + + PTR_ADDI a0, ra, -4 /* arg0: Callsite self return addr */ + PTR_ADDI a1, sp, MCOUNT_STACK_SIZE /* arg1: Callsite sp */ + move a2, s0 /* arg2: Callsite parent ra */ + bl prepare_ftrace_return + + MCOUNT_RESTORE_REGS + jr ra +SYM_FUNC_END(ftrace_graph_caller) + +SYM_FUNC_START(return_to_handler) + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + bl ftrace_return_to_handler + + /* Restore the real parent address: a0 -> ra */ + move ra, a0 + + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + jr ra +SYM_FUNC_END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- GitLab From a0a458fbd6f2317832e2d74acdbfa2451c3f4b8f Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 587/694] LoongArch/ftrace: Add recordmcount support Recordmcount utility under scripts is run, after compiling each object, to find out all the locations of calling _mcount() and put them into specific seciton named __mcount_loc. Then the linker collects all such information into a table in the kernel image (between __start_mcount_loc and __stop_mcount_loc) for later use by ftrace. This patch adds LoongArch specific definitions to identify such locations. And on LoongArch, only the C version is used to build the kernel now that CONFIG_HAVE_C_RECORDMCOUNT is on. Acked-by: Steven Rostedt (Google) Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ scripts/recordmcount.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 72a41e08f9f46..8b81cf9a65031 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -85,11 +85,13 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index cce12e1971d85..e30216525325b 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -38,6 +38,14 @@ #define R_AARCH64_ABS64 257 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#endif + #define R_ARM_PC24 1 #define R_ARM_THM_CALL 10 #define R_ARM_CALL 28 @@ -441,6 +449,28 @@ static int arm64_is_fake_mcount(Elf64_Rel const *rp) return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } +static int LARCH32_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + +static int LARCH64_is_fake_mcount(Elf64_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -558,6 +588,7 @@ static int do_file(char const *const fname) break; case EM_IA_64: reltype = R_IA64_IMM64; break; case EM_MIPS: /* reltype: e_class */ break; + case EM_LOONGARCH: /* reltype: e_class */ break; case EM_PPC: reltype = R_PPC_ADDR32; break; case EM_PPC64: reltype = R_PPC64_ADDR64; break; case EM_S390: /* reltype: e_class */ break; @@ -589,6 +620,10 @@ static int do_file(char const *const fname) reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } + if (w2(ehdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_32; + is_fake_mcount32 = LARCH32_is_fake_mcount; + } if (do32(ehdr, fname, reltype) < 0) goto out; break; @@ -610,6 +645,10 @@ static int do_file(char const *const fname) Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } + if (w2(ghdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_64; + is_fake_mcount64 = LARCH64_is_fake_mcount; + } if (do64(ghdr, fname, reltype) < 0) goto out; break; -- GitLab From 4733f09d880745953b88c3358b49ad495aecd8e9 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 588/694] LoongArch/ftrace: Add dynamic function tracer support The compiler has inserted 2 NOPs before the regular function prologue. T series registers are available and safe because of LoongArch's psABI. At runtime, we can replace nop with bl to enable ftrace call and replace bl with nop to disable ftrace call. The bl instruction requires us to save the original RA value, so it saves RA at t0 here. Details are: | Compiled | Disabled | Enabled | +------------+------------------------+------------------------+ | nop | move t0, ra | move t0, ra | | nop | nop | bl ftrace_caller | | func_body | func_body | func_body | The RA value will be recovered by ftrace_regs_entry, and restored into RA before returning to the regular function prologue. When a function is not being traced, the "move t0, ra" is not harmful. 1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c) The two functions turn each recorded call site of filtered functions into a call to ftrace_caller or nops. 2) ftracce_update_ftrace_func (in kernel/ftrace.c) turns the nops at ftrace_call into a call to a generic entry for function tracers. 3) ftrace_caller (in kernel/mcount_dyn.S) The entry where each _mcount call sites calls to once they are filtered to be traced. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/Makefile | 5 ++ arch/loongarch/include/asm/ftrace.h | 21 +++++ arch/loongarch/include/asm/inst.h | 11 +++ arch/loongarch/include/asm/unwind.h | 2 +- arch/loongarch/kernel/Makefile | 15 ++-- arch/loongarch/kernel/ftrace_dyn.c | 110 ++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 92 ++++++++++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 85 ++++++++++++++++++ arch/loongarch/kernel/unwind_prologue.c | 35 +++++++- 10 files changed, 367 insertions(+), 10 deletions(-) create mode 100644 arch/loongarch/kernel/ftrace_dyn.c create mode 100644 arch/loongarch/kernel/mcount_dyn.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 8b81cf9a65031..6e9aaa747ef77 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -88,6 +88,7 @@ config LOONGARCH select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 5232d8c0f9caf..4402387d27551 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -25,6 +25,11 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +ifdef CONFIG_DYNAMIC_FTRACE +KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + ifdef CONFIG_64BIT tool-archpref = $(64bit-tool-archpref) UTS_MACHINE := loongarch64 diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index c3f5cde404646..09ff0f84663d8 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -11,9 +11,30 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #ifndef __ASSEMBLY__ + +#ifndef CONFIG_DYNAMIC_FTRACE + #define mcount _mcount extern void _mcount(void); extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old); + +#else + +struct dyn_ftrace; +struct dyn_arch_ftrace { }; + +#define ftrace_init_nop ftrace_init_nop +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); + +#endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 67215af47b3d5..88e1673524e1d 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip) is_imm12_negative(ip->reg2i12_format.immediate); } +int larch_insn_read(void *addr, u32 *insnp); +int larch_insn_write(void *addr, u32 insn); +int larch_insn_patch_text(void *addr, u32 insn); + +u32 larch_insn_gen_nop(void); +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk); +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index 6af4718bdf015..a51eec00efb85 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -20,7 +20,7 @@ struct unwind_state { char type; /* UNWINDER_XXX */ struct stack_info stack_info; struct task_struct *task; - bool first, error; + bool first, error, is_ftrace; unsigned long sp, pc, ra; }; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 3f71bce1c7ce0..fcaa024a685ec 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o ifdef CONFIG_FUNCTION_TRACER -obj-y += mcount.o ftrace.o -CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) + ifndef CONFIG_DYNAMIC_FTRACE + obj-y += mcount.o ftrace.o + CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + else + obj-y += mcount_dyn.o ftrace_dyn.o + CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE) + endif + CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_MODULES) += module.o module-sections.o diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c new file mode 100644 index 0000000000000..3b82bface840b --- /dev/null +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include + +#include + +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) +{ + u32 replaced; + + if (validate) { + if (larch_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + + if (larch_insn_patch_text((void *)pc, new)) + return -EPERM; + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + u32 new; + unsigned long pc; + + pc = (unsigned long)&ftrace_call; + new = larch_insn_gen_bl(pc, (unsigned long)func); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * The compiler has inserted 2 NOPs before the regular function prologue. + * T series registers are available and safe because of LoongArch's psABI. + * + * At runtime, we can replace nop with bl to enable ftrace call and replace bl + * with nop to disable ftrace call. The bl requires us to save the original RA + * value, so it saves RA at t0 here. + * + * Details are: + * + * | Compiled | Disabled | Enabled | + * +------------+------------------------+------------------------+ + * | nop | move t0, ra | move t0, ra | + * | nop | nop | bl ftrace_caller | + * | func_body | func_body | func_body | + * + * The RA value will be recovered by ftrace_regs_entry, and restored into RA + * before returning to the regular function prologue. When a function is not + * being traced, the "move t0, ra" is not harmful. + */ + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip; + old = larch_insn_gen_nop(); + new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + old = larch_insn_gen_nop(); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + new = larch_insn_gen_nop(); + old = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +void arch_ftrace_update_code(int command) +{ + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index b1df0ec34bd1b..4fd22b4413d06 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -2,8 +2,100 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include +#include + +#include #include +static DEFINE_RAW_SPINLOCK(patch_lock); + +int larch_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE); + if (!ret) + *insnp = val; + + return ret; +} + +int larch_insn_write(void *addr, u32 insn) +{ + int ret; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE); + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +int larch_insn_patch_text(void *addr, u32 insn) +{ + int ret; + u32 *tp = addr; + + if ((unsigned long)tp & 3) + return -EINVAL; + + ret = larch_insn_write(tp, insn); + if (!ret) + flush_icache_range((unsigned long)tp, + (unsigned long)tp + LOONGARCH_INSN_SIZE); + + return ret; +} + +u32 larch_insn_gen_nop(void) +{ + return INSN_NOP; +} + +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) +{ + long offset = dest - pc; + unsigned int immediate_l, immediate_h; + union loongarch_instruction insn; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated bl instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + immediate_l = offset & 0xffff; + offset >>= 16; + immediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = bl_op; + insn.reg0i26_format.immediate_l = immediate_l; + insn.reg0i26_format.immediate_h = immediate_h; + + return insn.word; +} + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + union loongarch_instruction insn; + + insn.reg3_format.opcode = or_op; + insn.reg3_format.rd = rd; + insn.reg3_format.rj = rj; + insn.reg3_format.rk = rk; + + return insn.word; +} + +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + return larch_insn_gen_or(rd, rj, 0); +} + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S new file mode 100644 index 0000000000000..45ba88d2aacc7 --- /dev/null +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text +/* + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the + * regular C function prologue. When PC arrived here, the last 2 instructions + * are as follows: + * move t0, ra + * bl callsite (for modules, callsite is a tramplione) + * + * modules trampoline is as follows: + * lu12i.w t1, callsite[31:12] + * lu32i.d t1, callsite[51:32] + * lu52i.d t1, t1, callsite[63:52] + * jirl zero, t1, callsite[11:0] >> 2 + * + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to + * that the T series regs are available and safe because each C functions + * follows the LoongArch's psABI as well. + */ + + .macro ftrace_regs_entry + PTR_ADDI sp, sp, -PT_SIZE + PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */ + PTR_S a0, sp, PT_R4 + PTR_S a1, sp, PT_R5 + PTR_S a2, sp, PT_R6 + PTR_S a3, sp, PT_R7 + PTR_S a4, sp, PT_R8 + PTR_S a5, sp, PT_R9 + PTR_S a6, sp, PT_R10 + PTR_S a7, sp, PT_R11 + PTR_S fp, sp, PT_R22 + PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ + PTR_ADDI t8, sp, PT_SIZE + PTR_S t8, sp, PT_R3 + .endm + +SYM_FUNC_START(ftrace_stub) + jr ra +SYM_FUNC_END(ftrace_stub) + +SYM_CODE_START(ftrace_common) + PTR_ADDI a0, ra, -8 /* arg0: ip */ + move a1, t0 /* arg1: parent_ip */ + la.pcrel t1, function_trace_op + PTR_L a2, t1, 0 /* arg2: op */ + move a3, sp /* arg3: regs */ + +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + bl ftrace_stub +/* + * As we didn't use S series regs in this assmembly code and all calls + * are C function which will save S series regs by themselves, there is + * no need to restore S series regs. The T series is available and safe + * at the callsite, so there is no need to restore the T series regs. + */ +ftrace_common_return: + PTR_L ra, sp, PT_R1 + PTR_L a0, sp, PT_R4 + PTR_L a1, sp, PT_R5 + PTR_L a2, sp, PT_R6 + PTR_L a3, sp, PT_R7 + PTR_L a4, sp, PT_R8 + PTR_L a5, sp, PT_R9 + PTR_L a6, sp, PT_R10 + PTR_L a7, sp, PT_R11 + PTR_L fp, sp, PT_R22 + PTR_L t0, sp, PT_ERA + PTR_ADDI sp, sp, PT_SIZE + jr t0 +SYM_CODE_END(ftrace_common) + +SYM_CODE_START(ftrace_caller) + ftrace_regs_entry + b ftrace_common +SYM_CODE_END(ftrace_caller) diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 4571c3c87cd4c..46fe344d7fba8 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -8,6 +8,16 @@ #include #include +static inline void unwind_state_fixup(struct unwind_state *state) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + static unsigned long ftrace = (unsigned long)ftrace_call + 4; + + if (state->pc == ftrace) + state->is_ftrace = true; +#endif +} + unsigned long unwind_get_return_address(struct unwind_state *state) { @@ -41,15 +51,30 @@ static bool unwind_by_guess(struct unwind_state *state) static bool unwind_by_prologue(struct unwind_state *state) { - struct stack_info *info = &state->stack_info; - union loongarch_instruction *ip, *ip_end; long frame_ra = -1; unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; + struct pt_regs *regs; + struct stack_info *info = &state->stack_info; + union loongarch_instruction *ip, *ip_end; if (state->sp >= info->end || state->sp < info->begin) return false; + if (state->is_ftrace) { + /* + * As we meet ftrace_regs_entry, reset first flag like first doing + * tracing. Prologue analysis will stop soon because PC is at entry. + */ + regs = (struct pt_regs *)state->sp; + state->first = true; + state->is_ftrace = false; + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + return true; + } + if (!kallsyms_lookup_size_offset(pc, &size, &offset)) return false; @@ -95,7 +120,7 @@ static bool unwind_by_prologue(struct unwind_state *state) state->pc = *(unsigned long *)(state->sp + frame_ra); state->sp = state->sp + frame_size; - return !!__kernel_text_address(state->pc); + goto out; first: state->first = false; @@ -104,7 +129,9 @@ static bool unwind_by_prologue(struct unwind_state *state) state->pc = state->ra; - return !!__kernel_text_address(state->ra); +out: + unwind_state_fixup(state); + return !!__kernel_text_address(state->pc); } void unwind_start(struct unwind_state *state, struct task_struct *task, -- GitLab From 5fcfad3d41cc70f39fb31e7ee314989cc4c5f02c Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 589/694] LoongArch/ftrace: Add dynamic function graph tracer support Once the function_graph tracer is enabled, a filtered function has the following call sequence: 1) ftracer_caller ==> on/off by ftrace_make_call/ftrace_make_nop 2) ftrace_graph_caller 3) ftrace_graph_call ==> on/off by ftrace_en/disable_ftrace_graph_caller 4) prepare_ftrace_return Considering the following DYNAMIC_FTRACE_WITH_REGS feature, it would be more extendable to have a ftrace_graph_caller function, instead of calling prepare_ftrace_return directly in ftrace_caller. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/ftrace_dyn.c | 44 ++++++++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 24 ++++++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 33 ++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 3b82bface840b..5a801c328e2a0 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -108,3 +108,47 @@ int __init ftrace_dyn_arch_init(void) { return 0; } + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) +{ + unsigned long old; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + if (!function_graph_enter(old, self_addr, 0, NULL)) + *parent = return_hooker; +} + +static int ftrace_modify_graph_caller(bool enable) +{ + u32 branch, nop; + unsigned long pc, func; + extern void ftrace_graph_call(void); + + pc = (unsigned long)&ftrace_graph_call; + func = (unsigned long)&ftrace_graph_caller; + + nop = larch_insn_gen_nop(); + branch = larch_insn_gen_b(pc, func); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 4fd22b4413d06..39671e87e31c9 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -55,6 +55,30 @@ u32 larch_insn_gen_nop(void) return INSN_NOP; } +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest) +{ + long offset = dest - pc; + unsigned int immediate_l, immediate_h; + union loongarch_instruction insn; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated b instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + immediate_l = offset & 0xffff; + offset >>= 16; + immediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = b_op; + insn.reg0i26_format.immediate_l = immediate_l; + insn.reg0i26_format.immediate_h = immediate_h; + + return insn.word; +} + u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) { long offset = dest - pc; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index 45ba88d2aacc7..cce3daa2eb1e3 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -57,6 +57,11 @@ SYM_CODE_START(ftrace_common) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) + nop /* b ftrace_graph_caller */ +#endif + /* * As we didn't use S series regs in this assmembly code and all calls * are C function which will save S series regs by themselves, there is @@ -83,3 +88,31 @@ SYM_CODE_START(ftrace_caller) ftrace_regs_entry b ftrace_common SYM_CODE_END(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_CODE_START(ftrace_graph_caller) + PTR_L a0, sp, PT_ERA + PTR_ADDI a0, a0, -8 /* arg0: self_addr */ + PTR_ADDI a1, sp, PT_R1 /* arg1: parent */ + bl prepare_ftrace_return + b ftrace_common_return +SYM_CODE_END(ftrace_graph_caller) + +SYM_CODE_START(return_to_handler) + /* Save return value regs */ + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + move a0, zero + bl ftrace_return_to_handler + move ra, a0 + + /* Restore return value regs */ + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + + jr ra +SYM_CODE_END(return_to_handler) +#endif -- GitLab From 8778ba2c8a5df11859dc6f2b2205700388b63fd3 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:15 +0800 Subject: [PATCH 590/694] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_REGS support This patch implements CONFIG_DYNAMIC_FTRACE_WITH_REGS on LoongArch, which allows a traced function's arguments (and some other registers) to be captured into a struct pt_regs, allowing these to be inspected and modified. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/ftrace.h | 2 ++ arch/loongarch/kernel/ftrace_dyn.c | 15 +++++++++++++ arch/loongarch/kernel/mcount_dyn.S | 35 +++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6e9aaa747ef77..f2d1b2aef2d40 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -89,6 +89,7 @@ config LOONGARCH select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 09ff0f84663d8..dd4a0c8efd246 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -23,6 +23,8 @@ extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsit struct dyn_ftrace; struct dyn_arch_ftrace { }; +#define ARCH_SUPPORTS_FTRACE_OPS 1 + #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 5a801c328e2a0..d6f30918f94f9 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -28,6 +28,21 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) return 0; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + u32 old, new; + unsigned long pc; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + + new = larch_insn_gen_bl(pc, addr); + old = larch_insn_gen_bl(pc, old_addr); + + return ftrace_modify_code(pc, old, new, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + int ftrace_update_ftrace_func(ftrace_func_t func) { u32 new; diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index cce3daa2eb1e3..bbabf06244c26 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -27,7 +27,7 @@ * follows the LoongArch's psABI as well. */ - .macro ftrace_regs_entry + .macro ftrace_regs_entry allregs=0 PTR_ADDI sp, sp, -PT_SIZE PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */ PTR_S a0, sp, PT_R4 @@ -39,6 +39,30 @@ PTR_S a6, sp, PT_R10 PTR_S a7, sp, PT_R11 PTR_S fp, sp, PT_R22 + .if \allregs + PTR_S tp, sp, PT_R2 + PTR_S t0, sp, PT_R12 + PTR_S t1, sp, PT_R13 + PTR_S t2, sp, PT_R14 + PTR_S t3, sp, PT_R15 + PTR_S t4, sp, PT_R16 + PTR_S t5, sp, PT_R17 + PTR_S t6, sp, PT_R18 + PTR_S t7, sp, PT_R19 + PTR_S t8, sp, PT_R20 + PTR_S u0, sp, PT_R21 + PTR_S s0, sp, PT_R23 + PTR_S s1, sp, PT_R24 + PTR_S s2, sp, PT_R25 + PTR_S s3, sp, PT_R26 + PTR_S s4, sp, PT_R27 + PTR_S s5, sp, PT_R28 + PTR_S s6, sp, PT_R29 + PTR_S s7, sp, PT_R30 + PTR_S s8, sp, PT_R31 + /* Clear it for later use as a flag sometimes. */ + PTR_S zero, sp, PT_R0 + .endif PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ PTR_ADDI t8, sp, PT_SIZE PTR_S t8, sp, PT_R3 @@ -85,10 +109,17 @@ ftrace_common_return: SYM_CODE_END(ftrace_common) SYM_CODE_START(ftrace_caller) - ftrace_regs_entry + ftrace_regs_entry allregs=0 b ftrace_common SYM_CODE_END(ftrace_caller) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +SYM_CODE_START(ftrace_regs_caller) + ftrace_regs_entry allregs=1 + b ftrace_common +SYM_CODE_END(ftrace_regs_caller) +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) PTR_L a0, sp, PT_ERA -- GitLab From ac7127e1cc65aeb578998c992a05dbc80fa18f0f Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:16 +0800 Subject: [PATCH 591/694] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_ARGS support Allow for arguments to be passed in to ftrace_regs by default. If this is set, then arguments and stack can be found from the pt_regs. 1. HAVE_DYNAMIC_FTRACE_WITH_ARGS don't need special hook for graph tracer entry point, but instead we can use graph_ops::func function to install the return_hooker. 2. Livepatch requires this option in the future. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/ftrace.h | 17 +++++++++++++++++ arch/loongarch/kernel/ftrace_dyn.c | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index f2d1b2aef2d40..a36bb0fe19775 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -89,6 +89,7 @@ config LOONGARCH select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT select HAVE_EXIT_THREAD diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index dd4a0c8efd246..ee7feface27a6 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -37,6 +37,23 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_ops; + +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &fregs->regs; +} + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index d6f30918f94f9..439ba829b9fd9 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -139,6 +139,16 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) *parent = return_hooker; } +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct pt_regs *regs = &fregs->regs; + unsigned long *parent = (unsigned long *)®s->regs[1]; + + prepare_ftrace_return(ip, (unsigned long *)parent); +} +#else static int ftrace_modify_graph_caller(bool enable) { u32 branch, nop; @@ -166,4 +176,5 @@ int ftrace_disable_ftrace_graph_caller(void) { return ftrace_modify_graph_caller(false); } +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- GitLab From a51ac5246d2505b58229242959d2bc73d113ca50 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 592/694] LoongArch/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support ftrace_graph_ret_addr() can be called by stack unwinding code to convert a found stack return address ('ret') to its original value, in case the function graph tracer has modified it to be 'return_to_handler'. If the hasn't been modified, the unchanged value of 'ret' is returned. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ftrace.h | 3 +++ arch/loongarch/include/asm/unwind.h | 1 + arch/loongarch/kernel/ftrace_dyn.c | 2 +- arch/loongarch/kernel/unwind_guess.c | 4 +++- arch/loongarch/kernel/unwind_prologue.c | 15 +++++++++++---- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index ee7feface27a6..8c7d137e4871b 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -6,6 +6,8 @@ #ifndef _ASM_LOONGARCH_FTRACE_H #define _ASM_LOONGARCH_FTRACE_H +#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ @@ -24,6 +26,7 @@ struct dyn_ftrace; struct dyn_arch_ftrace { }; #define ARCH_SUPPORTS_FTRACE_OPS 1 +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index a51eec00efb85..f2b52b9ea93d2 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -21,6 +21,7 @@ struct unwind_state { struct stack_info stack_info; struct task_struct *task; bool first, error, is_ftrace; + int graph_idx; unsigned long sp, pc, ra; }; diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 439ba829b9fd9..e23c3be29baab 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -135,7 +135,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) old = *parent; - if (!function_graph_enter(old, self_addr, 0, NULL)) + if (!function_graph_enter(old, self_addr, 0, parent)) *parent = return_hooker; } diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 5afa6064d73e4..e2d2e4f3001f4 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include +#include #include @@ -53,7 +54,8 @@ bool unwind_next_frame(struct unwind_state *state) state->sp < info->end; state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); - + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); if (__kernel_text_address(addr)) return true; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 46fe344d7fba8..0f8d1451ebb84 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include #include @@ -42,6 +43,8 @@ static bool unwind_by_guess(struct unwind_state *state) state->sp < info->end; state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); if (__kernel_text_address(addr)) return true; } @@ -174,8 +177,11 @@ bool unwind_next_frame(struct unwind_state *state) break; case UNWINDER_PROLOGUE: - if (unwind_by_prologue(state)) + if (unwind_by_prologue(state)) { + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + state->pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); return true; + } if (info->type == STACK_TYPE_IRQ && info->end == state->sp) { @@ -185,10 +191,11 @@ bool unwind_next_frame(struct unwind_state *state) if (user_mode(regs) || !__kernel_text_address(pc)) return false; - state->pc = pc; - state->sp = regs->regs[3]; - state->ra = regs->regs[1]; state->first = true; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); get_stack_info(state->sp, state->task, info); return true; -- GitLab From 28ac0a9e04d7dfb42220dc9d221164d93f20fb3a Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 593/694] LoongArch: modules/ftrace: Initialize PLT at load time This patch implements ftrace trampolines through plt entry. Tested by forcing ftrace_make_call() to use the module PLT, and then loading up a module after setting up ftrace with: | echo ":mod:" > set_ftrace_filter; | echo function > current_tracer; | modprobe Since FTRACE_ADDR/FTRACE_REGS_ADDR is only defined when CONFIG_DYNAMIC_ FTRACE is selected, we wrap their usage in module_init_ftrace_plt() with ifdeffery rather than using IS_ENABLED(). Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ftrace.h | 4 ++ arch/loongarch/include/asm/inst.h | 3 + arch/loongarch/include/asm/module.h | 5 +- arch/loongarch/include/asm/module.lds.h | 1 + arch/loongarch/kernel/ftrace_dyn.c | 93 +++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 11 +++ arch/loongarch/kernel/module-sections.c | 12 +++- arch/loongarch/kernel/module.c | 21 ++++++ 8 files changed, 148 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 8c7d137e4871b..90f9d3399b2aa 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -6,6 +6,10 @@ #ifndef _ASM_LOONGARCH_FTRACE_H #define _ASM_LOONGARCH_FTRACE_H +#define FTRACE_PLT_IDX 0 +#define FTRACE_REGS_PLT_IDX 1 +#define NR_FTRACE_PLTS 2 + #define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 88e1673524e1d..c00e1512d4fa3 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -13,10 +13,12 @@ #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 +#define ADDR_IMMMASK_LU12IW 0x00000000FFFFF000 #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 #define ADDR_IMMSHIFT_LU52ID 52 #define ADDR_IMMSHIFT_LU32ID 32 +#define ADDR_IMMSHIFT_LU12IW 12 #define ADDR_IMMSHIFT_ADDU16ID 16 #define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) @@ -360,6 +362,7 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk); u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); +u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 60dc62a1146ee..12a0f1e66916d 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -20,6 +20,9 @@ struct mod_arch_specific { struct mod_section got; struct mod_section plt; struct mod_section plt_idx; + + /* For CONFIG_DYNAMIC_FTRACE */ + struct plt_entry *ftrace_trampolines; }; struct got_entry { @@ -49,7 +52,7 @@ static inline struct plt_entry emit_plt_entry(unsigned long val) { u32 lu12iw, lu32id, lu52id, jirl; - lu12iw = (lu12iw_op << 25 | (((val >> 12) & 0xfffff) << 5) | LOONGARCH_GPR_T1); + lu12iw = larch_insn_gen_lu12iw(LOONGARCH_GPR_T1, ADDR_IMM(val, LU12IW)); lu32id = larch_insn_gen_lu32id(LOONGARCH_GPR_T1, ADDR_IMM(val, LU32ID)); lu52id = larch_insn_gen_lu52id(LOONGARCH_GPR_T1, LOONGARCH_GPR_T1, ADDR_IMM(val, LU52ID)); jirl = larch_insn_gen_jirl(0, LOONGARCH_GPR_T1, 0, (val & 0xfff)); diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index a3d1bc0fcc72e..438f09d4ccf41 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -5,4 +5,5 @@ SECTIONS { .got : { BYTE(0) } .plt : { BYTE(0) } .plt.idx : { BYTE(0) } + .ftrace_trampoline : { BYTE(0) } } diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index e23c3be29baab..0f07591cab309 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -9,6 +9,7 @@ #include #include +#include static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) { @@ -29,18 +30,78 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) } #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + +#ifdef CONFIG_MODULES +static inline int __get_mod(struct module **mod, unsigned long addr) +{ + preempt_disable(); + *mod = __module_text_address(addr); + preempt_enable(); + + if (WARN_ON(!(*mod))) + return -EINVAL; + + return 0; +} + +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt = mod->arch.ftrace_trampolines; + + if (addr == FTRACE_ADDR) + return &plt[FTRACE_PLT_IDX]; + if (addr == FTRACE_REGS_ADDR && + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return &plt[FTRACE_REGS_PLT_IDX]; + + return NULL; +} + +static unsigned long get_plt_addr(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt; + + plt = get_ftrace_plt(mod, addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)addr); + return -EINVAL; + } + + return (unsigned long)plt; +} +#endif + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + + old_addr = get_plt_addr(mod, old_addr); + } +#endif + new = larch_insn_gen_bl(pc, addr); old = larch_insn_gen_bl(pc, old_addr); return ftrace_modify_code(pc, old, new, true); } + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ int ftrace_update_ftrace_func(ftrace_func_t func) @@ -91,9 +152,25 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } +#endif + old = larch_insn_gen_nop(); new = larch_insn_gen_bl(pc, addr); @@ -104,9 +181,25 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad { u32 old, new; unsigned long pc; + long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; +#ifdef CONFIG_MODULES + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } +#endif + new = larch_insn_gen_nop(); old = larch_insn_gen_bl(pc, addr); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 39671e87e31c9..512579d79b221 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -120,6 +120,17 @@ u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) return larch_insn_gen_or(rd, rj, 0); } +u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm) +{ + union loongarch_instruction insn; + + insn.reg1i20_format.opcode = lu12iw_op; + insn.reg1i20_format.rd = rd; + insn.reg1i20_format.immediate = imm; + + return insn.word; +} + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 13d9a427325aa..d4dbcda1c4b0a 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -6,6 +6,7 @@ #include #include #include +#include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { @@ -103,7 +104,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; - Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec, *tramp = NULL; /* * Find the empty .plt sections. @@ -115,6 +116,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shndx = i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".ftrace_trampoline")) + tramp = sechdrs + i; } if (!mod->arch.got.shndx) { @@ -170,5 +173,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 899dc677cec3e..b8b86088b2dd2 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -473,6 +475,23 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } +static void module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + struct plt_entry *ftrace_plts; + + ftrace_plts = (void *)sechdrs->sh_addr; + + ftrace_plts[FTRACE_PLT_IDX] = emit_plt_entry(FTRACE_ADDR); + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ftrace_plts[FTRACE_REGS_PLT_IDX] = emit_plt_entry(FTRACE_REGS_ADDR); + + mod->arch.ftrace_trampolines = ftrace_plts; +#endif +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { @@ -482,6 +501,8 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (!strcmp(".altinstructions", secstrs + s->sh_name)) apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name)) + module_init_ftrace_plt(hdr, s, mod); } return 0; -- GitLab From 5535f4f70cfc15ef55b6ea7c7e17337b17337cb6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Dec 2022 22:40:21 +0800 Subject: [PATCH 594/694] LoongArch: Update Loongson-3 default config file 1, Enable suspend (ACPI S3) and hibernation (ACPI S4). 2, Enable some options for FDT-based systems (e.g., SERIAL_OF_PLATFORM). 3, Enable CONFIG_KALLSYMS_ALL and CONFIG_DEBUG_FS to convenient ftrace. 4, Regenerate the whole file to keep the order of options be the same as the latest source code. Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 56 ++++++++++++---------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3540e9c0a6310..eb84cae642e58 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -34,12 +34,13 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -CONFIG_USERFAULTFD=y +CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y -# CONFIG_COMPAT_BRK is not set CONFIG_LOONGARCH=y CONFIG_64BIT=y CONFIG_MACH_LOONGSON64=y +CONFIG_PAGE_SIZE_16KB=y +CONFIG_HZ_250=y CONFIG_DMI=y CONFIG_EFI=y CONFIG_SMP=y @@ -47,14 +48,14 @@ CONFIG_HOTPLUG_CPU=y CONFIG_NR_CPUS=64 CONFIG_NUMA=y CONFIG_KEXEC=y -CONFIG_PAGE_SIZE_16KB=y -CONFIG_HZ_250=y +CONFIG_SUSPEND=y +CONFIG_HIBERNATION=y CONFIG_ACPI=y CONFIG_ACPI_SPCR_TABLE=y -CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_TAD=y CONFIG_ACPI_DOCK=y CONFIG_ACPI_IPMI=m +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_PCI_SLOT=y CONFIG_ACPI_HOTPLUG_MEMORY=y CONFIG_EFI_ZBOOT=y @@ -73,17 +74,19 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_ZPOOL=y CONFIG_ZSWAP=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y -CONFIG_ZPOOL=y CONFIG_ZBUD=y CONFIG_Z3FOLD=y CONFIG_ZSMALLOC=m +# CONFIG_COMPAT_BRK is not set +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -118,7 +121,6 @@ CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NF_CONNTRACK=m -CONFIG_NF_LOG_NETDEV=m CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_NETBIOS_NS=m @@ -416,6 +418,7 @@ CONFIG_SCSI_VIRTIO=m CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_DWC=y CONFIG_PATA_ATIIXP=y CONFIG_PATA_PCMCIA=m CONFIG_MD=y @@ -469,13 +472,11 @@ CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set CONFIG_BNX2=y -# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CAVIUM is not set CONFIG_CHELSIO_T1=m CONFIG_CHELSIO_T1_1G=y CONFIG_CHELSIO_T3=m CONFIG_CHELSIO_T4=m -# CONFIG_NET_VENDOR_CIRRUS is not set # CONFIG_NET_VENDOR_CISCO is not set # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set @@ -496,6 +497,7 @@ CONFIG_IXGBE=y # CONFIG_NET_VENDOR_NVIDIA is not set # CONFIG_NET_VENDOR_OKI is not set # CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RDC is not set CONFIG_8139CP=m @@ -505,9 +507,9 @@ CONFIG_R8169=y # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SILAN is not set # CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set CONFIG_STMMAC_ETH=y # CONFIG_NET_VENDOR_SUN is not set @@ -588,6 +590,7 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_NONSTANDARD=y CONFIG_PRINTER=m CONFIG_VIRTIO_CONSOLE=y @@ -602,6 +605,11 @@ CONFIG_I2C_GPIO=y CONFIG_SPI=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_RESTART=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_POWER_RESET_SYSCON_POWEROFF=y +CONFIG_SYSCON_REBOOT_MODE=y CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m @@ -609,16 +617,16 @@ CONFIG_SENSORS_W83627HF=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y +CONFIG_IR_IMON_DECODER=m +CONFIG_IR_JVC_DECODER=m +CONFIG_IR_MCE_KBD_DECODER=m CONFIG_IR_NEC_DECODER=m CONFIG_IR_RC5_DECODER=m CONFIG_IR_RC6_DECODER=m -CONFIG_IR_JVC_DECODER=m -CONFIG_IR_SONY_DECODER=m CONFIG_IR_SANYO_DECODER=m CONFIG_IR_SHARP_DECODER=m -CONFIG_IR_MCE_KBD_DECODER=m +CONFIG_IR_SONY_DECODER=m CONFIG_IR_XMP_DECODER=m -CONFIG_IR_IMON_DECODER=m CONFIG_MEDIA_SUPPORT=m CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m @@ -638,6 +646,7 @@ CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y +CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y @@ -647,7 +656,6 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m -# CONFIG_SND_ISA is not set CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y @@ -818,10 +826,6 @@ CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m @@ -831,6 +835,9 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m @@ -844,6 +851,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set -- GitLab From f65a486821cfd363833079b2a7b0769250ee21c9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 22:04:07 +0900 Subject: [PATCH 595/694] kbuild: change module.order to list *.o instead of *.ko scripts/Makefile.build replaces the suffix .o with .ko, then scripts/Makefile.modpost calls the sed command to change .ko back to the original .o suffix. Instead of converting the suffixes back-and-forth, store the .o paths in modules.order, and replace it with .ko in 'make modules_install'. This avoids the unneeded sed command. Signed-off-by: Masahiro Yamada Reviewed-by: Luis Chamberlain --- Makefile | 2 +- scripts/Makefile.build | 2 +- scripts/Makefile.modfinal | 6 +++--- scripts/Makefile.modinst | 2 +- scripts/Makefile.modpost | 7 +++++-- scripts/clang-tools/gen_compile_commands.py | 8 ++++---- scripts/gen_autoksyms.sh | 2 +- scripts/mod/modpost.c | 11 ++++------- scripts/modules-check.sh | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 591485152a95c..f506879e74526 100644 --- a/Makefile +++ b/Makefile @@ -1564,7 +1564,7 @@ __modinst_pre: rm -f $(MODLIB)/build ; \ ln -s $(CURDIR) $(MODLIB)/build ; \ fi - @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order + @sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order @cp -f modules.builtin $(MODLIB)/ @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 799df12b53f39..267eb7aac5b22 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -435,7 +435,7 @@ $(obj)/built-in.a: $(real-obj-y) FORCE # modules.order unless contained modules are updated. cmd_modules_order = { $(foreach m, $(real-prereqs), \ - $(if $(filter %/modules.order, $m), cat $m, echo $(patsubst %.o,%.ko,$m));) :; } \ + $(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \ > $@ $(obj)/modules.order: $(obj-m) FORCE diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 83f2797e530c5..a30d5b08eee94 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -15,7 +15,7 @@ include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order modules := $(call read-file, $(MODORDER)) -__modfinal: $(modules) +__modfinal: $(modules:%.o=%.ko) @: # modname and part-of-module are set to make c_flags define proper module flags @@ -57,13 +57,13 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) # Re-generate module BTFs if either module's .ko or vmlinux changed -$(modules): %.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE +%.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) endif -targets += $(modules) $(modules:.ko=.mod.o) +targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 65aac6be78ec3..836391e5d2097 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -26,7 +26,7 @@ suffix-$(CONFIG_MODULE_COMPRESS_GZIP) := .gz suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz suffix-$(CONFIG_MODULE_COMPRESS_ZSTD) := .zst -modules := $(patsubst $(extmod_prefix)%, $(dst)/%$(suffix-y), $(modules)) +modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules)) __modinst: $(modules) @: diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 55a72f5eb76dd..f814a6acd200c 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -107,7 +107,10 @@ ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) modpost-args += -w endif -modorder-if-needed := $(if $(KBUILD_MODULES), $(MODORDER)) +ifdef KBUILD_MODULES +modorder-if-needed := $(MODORDER) +modpost-args += -T $(MODORDER) +endif MODPOST = scripts/mod/modpost @@ -119,7 +122,7 @@ quiet_cmd_modpost = MODPOST $@ echo >&2 "WARNING: $(missing-input) is missing."; \ echo >&2 " Modules may not have dependencies or modversions."; \ echo >&2 " You may get many unresolved symbol warnings.";) \ - sed 's/ko$$/o/' $(or $(modorder-if-needed), /dev/null) | $(MODPOST) $(modpost-args) -T - $(vmlinux.o-if-present) + $(MODPOST) $(modpost-args) $(vmlinux.o-if-present) targets += $(output-symdump) $(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index d800b2c0af977..0227522959a45 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -138,10 +138,10 @@ def cmdfiles_for_modorder(modorder): """ with open(modorder) as f: for line in f: - ko = line.rstrip() - base, ext = os.path.splitext(ko) - if ext != '.ko': - sys.exit('{}: module path must end with .ko'.format(ko)) + obj = line.rstrip() + base, ext = os.path.splitext(obj) + if ext != '.o': + sys.exit('{}: module path must end with .o'.format(obj)) mod = base + '.mod' # Read from *.mod, to get a list of objects that compose the module. with open(mod) as m: diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh index 653fadbad302f..12bcfae940ee9 100755 --- a/scripts/gen_autoksyms.sh +++ b/scripts/gen_autoksyms.sh @@ -48,7 +48,7 @@ cat > "$output_file" << EOT EOT { - [ -n "${read_modorder}" ] && sed 's/ko$/usyms/' modules.order | xargs cat + [ -n "${read_modorder}" ] && sed 's/o$/usyms/' modules.order | xargs cat echo "$needed_symbols" [ -n "$ksym_wl" ] && cat "$ksym_wl" } | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 56d856f2e5115..b48838a71bf6a 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1856,11 +1856,9 @@ static void read_symbols_from_files(const char *filename) FILE *in = stdin; char fname[PATH_MAX]; - if (strcmp(filename, "-") != 0) { - in = fopen(filename, "r"); - if (!in) - fatal("Can't open filenames file %s: %m", filename); - } + in = fopen(filename, "r"); + if (!in) + fatal("Can't open filenames file %s: %m", filename); while (fgets(fname, PATH_MAX, in) != NULL) { if (strends(fname, "\n")) @@ -1868,8 +1866,7 @@ static void read_symbols_from_files(const char *filename) read_symbols(fname); } - if (in != stdin) - fclose(in); + fclose(in); } #define SZ 500 diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh index e06327722263e..4c8da90de78ea 100755 --- a/scripts/modules-check.sh +++ b/scripts/modules-check.sh @@ -16,7 +16,7 @@ check_same_name_modules() for m in $(sed 's:.*/::' "$1" | sort | uniq -d) do echo "error: the following would cause module name conflict:" >&2 - sed -n "/\/$m/s:^: :p" "$1" >&2 + sed -n "/\/$m/s:^\(.*\)\.o\$: \1.ko:p" "$1" >&2 exit_code=1 done } -- GitLab From 3d57e1b7b1d42d4040f0d993b66ff06beda02c54 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 Dec 2022 22:04:08 +0900 Subject: [PATCH 596/694] kbuild: refactor the prerequisites of the modpost rule The prerequisites of modpost are cluttered. The variables *-if-present and *-if-needed are unreadable. It is cleaner to append them into modpost-deps. Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f814a6acd200c..5eb5e8280379a 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -38,6 +38,8 @@ __modpost: include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +MODPOST = scripts/mod/modpost + modpost-args = \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ @@ -46,6 +48,8 @@ modpost-args = \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ -o $@ +modpost-deps := $(MODPOST) + # 'make -i -k' ignores compile errors, and builds as many modules as possible. ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),) modpost-args += -n @@ -78,12 +82,13 @@ targets += .vmlinux.objs .vmlinux.objs: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed,vmlinux_objs) -vmlinux.o-if-present := $(wildcard vmlinux.o) -output-symdump := vmlinux.symvers - -ifdef KBUILD_MODULES -output-symdump := $(if $(vmlinux.o-if-present), Module.symvers, modules-only.symvers) -missing-input := $(filter-out $(vmlinux.o-if-present),vmlinux.o) +ifeq ($(wildcard vmlinux.o),) +missing-input := vmlinux.o +output-symdump := modules-only.symvers +else +modpost-args += vmlinux.o +modpost-deps += vmlinux.o +output-symdump := $(if $(KBUILD_MODULES), Module.symvers, vmlinux.symvers) endif else @@ -95,11 +100,16 @@ src := $(obj) # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS include $(kbuild-file) -module.symvers-if-present := $(wildcard Module.symvers) output-symdump := $(KBUILD_EXTMOD)/Module.symvers -missing-input := $(filter-out $(module.symvers-if-present), Module.symvers) -modpost-args += -e $(addprefix -i ,$(module.symvers-if-present) $(KBUILD_EXTRA_SYMBOLS)) +ifeq ($(wildcard Module.symvers),) +missing-input := Module.symvers +else +modpost-args += -i Module.symvers +modpost-deps += Module.symvers +endif + +modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS)) endif # ($(KBUILD_EXTMOD),) @@ -108,12 +118,10 @@ modpost-args += -w endif ifdef KBUILD_MODULES -modorder-if-needed := $(MODORDER) modpost-args += -T $(MODORDER) +modpost-deps += $(MODORDER) endif -MODPOST = scripts/mod/modpost - # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ @@ -122,10 +130,10 @@ quiet_cmd_modpost = MODPOST $@ echo >&2 "WARNING: $(missing-input) is missing."; \ echo >&2 " Modules may not have dependencies or modversions."; \ echo >&2 " You may get many unresolved symbol warnings.";) \ - $(MODPOST) $(modpost-args) $(vmlinux.o-if-present) + $(MODPOST) $(modpost-args) targets += $(output-symdump) -$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE +$(output-symdump): $(modpost-deps) FORCE $(call if_changed,modpost) __modpost: $(output-symdump) -- GitLab From 87d599fc3955e59b1ed30f350321a4be5353f945 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 13 Dec 2022 20:24:20 +0900 Subject: [PATCH 597/694] kbuild: ensure Make >= 3.82 is used Documentation/process/changes.rst notes the minimal GNU Make version, but it is not checked anywhere. We could check $(MAKE_VERSION), but another simple way is to check $(.FEATURES) since the feature list always grows. GNU Make 3.81 expands $(.FEATURES) to: target-specific order-only second-expansion else-if archives jobserver check-symlink GNU Make 3.82 expands $(.FEATURES) to: target-specific order-only second-expansion else-if shortest-stem undefine archives jobserver check-symlink To ensure Make >= 3.82, you can check either 'shortest-stem' or 'undefine'. This way is not always possible. For example, Make 4.0 through 4.2 have the same set of $(.FEATURES). At that point, we will need to come up with a different approach. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Reviewed-by: Nicolas Schier --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index f506879e74526..5f015f206e12c 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,10 @@ NAME = Hurr durr I'ma ninja sloth # Comments in this file are targeted only to the developer, do not # expect to learn how to build the kernel reading this file. +ifeq ($(filter undefine,$(.FEATURES)),) +$(error GNU Make >= 3.82 is required. Your Make version is $(MAKE_VERSION)) +endif + $(if $(filter __%, $(MAKECMDGOALS)), \ $(error targets prefixed with '__' are only for internal use)) -- GitLab From 0d24f1b7cc65ee73ea8d04e0d10f77a7cb7a83f3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:28 -0700 Subject: [PATCH 598/694] padata: Mark padata_work_init() as __ref When building arm64 allmodconfig + ThinLTO with clang and a proposed modpost update to account for -ffuncton-sections, the following warning appears: WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text) LLVM has optimized padata_work_init() to include the address of padata_mt_helper() directly because it inlined the other call to padata_work_init() with padata_parallel_worker(), meaning the remaining uses of padata_work_init() use padata_mt_helper() as the work_fn argument. This optimization causes modpost to complain since padata_work_init() is not __init, whereas padata_mt_helper() is. Since padata_work_init() is only called from __init code when padata_mt_helper() is passed as the work_fn argument, mark padata_work_init() as __ref, which makes it clear to modpost that this scenario is okay. Suggested-by: Daniel Jordan Signed-off-by: Nathan Chancellor Acked-by: Daniel Jordan Signed-off-by: Masahiro Yamada --- kernel/padata.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index e5819bb8bd1dc..d175cc0004538 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -83,8 +83,16 @@ static struct padata_work *padata_work_alloc(void) return pw; } -static void padata_work_init(struct padata_work *pw, work_func_t work_fn, - void *data, int flags) +/* + * This function is marked __ref because this function may be optimized in such + * a way that it directly refers to work_fn's address, which causes modpost to + * complain when work_fn is marked __init. This scenario was observed with clang + * LTO, where padata_work_init() was optimized to refer directly to + * padata_mt_helper() because the calls to padata_work_init() with other work_fn + * values were eliminated or inlined. + */ +static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn, + void *data, int flags) { if (flags & PADATA_WORK_ONSTACK) INIT_WORK_ONSTACK(&pw->pw_work, work_fn); -- GitLab From 19331e84c3873256537d446afec1f6c507f8c4ef Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Dec 2022 11:35:29 -0700 Subject: [PATCH 599/694] modpost: Include '.text.*' in TEXT_SECTIONS Commit 6c730bfc894f ("modpost: handle -ffunction-sections") added ".text.*" to the OTHER_TEXT_SECTIONS macro to fix certain section mismatch warnings. Unfortunately, this makes it impossible for modpost to warn about section mismatches with LTO, which implies '-ffunction-sections', as all functions are put in their own '.text.' sections, which may still reference functions in sections they are not supposed to, such as __init. Fix this by moving ".text.*" into TEXT_SECTIONS, so that configurations with '-ffunction-sections' will see warnings about mismatched sections. Link: https://lore.kernel.org/Y39kI3MOtVI5BAnV@google.com/ Reported-by: Vincent Donnefort Reviewed-and-tested-by: Alexander Lobakin Reviewed-by: Sami Tolvanen Tested-by: Vincent Donnefort Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index b48838a71bf6a..640e1a244ba90 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -822,10 +822,10 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" -- GitLab From 75333d48f92256a0dec91dbf07835e804fc411c0 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 12 Dec 2022 14:50:11 -0800 Subject: [PATCH 600/694] NFSD: fix use-after-free in __nfs42_ssc_open() Problem caused by source's vfsmount being unmounted but remains on the delayed unmount list. This happens when nfs42_ssc_open() return errors. Fixed by removing nfsd4_interssc_connect(), leave the vfsmount for the laundromat to unmount when idle time expires. We don't need to call nfs_do_sb_deactive when nfs42_ssc_open return errors since the file was not opened so nfs_server->active was not incremented. Same as in nfsd4_copy, if we fail to launch nfsd4_do_async_copy thread then there's no need to call nfs_do_sb_deactive Reported-by: Xingyuan Mo Signed-off-by: Dai Ngo Tested-by: Xingyuan Mo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 73ed32ad23a20..bd880d55f565b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1461,13 +1461,6 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, return status; } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ - nfs_do_sb_deactive(ss_mnt->mnt_sb); - mntput(ss_mnt); -} - /* * Verify COPY destination stateid. * @@ -1570,11 +1563,6 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, { } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ -} - static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, nfs4_stateid *stateid) @@ -1770,7 +1758,7 @@ static int nfsd4_do_async_copy(void *data) default: nfserr = nfserr_offload_denied; } - nfsd4_interssc_disconnect(copy->ss_mnt); + /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, @@ -1851,8 +1839,10 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (nfsd4_ssc_is_inter(copy)) - nfsd4_interssc_disconnect(copy->ss_mnt); + /* + * source's vfsmount of inter-copy will be unmounted + * by the laundromat + */ goto out; } -- GitLab From 3f148f3318140035e87decc1214795ff0755757b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 28 Oct 2022 00:31:04 +0300 Subject: [PATCH 601/694] x86/kasan: Map shadow for percpu pages on demand KASAN maps shadow for the entire CPU-entry-area: [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE] This will explode once the per-cpu entry areas are randomized since it will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to allocate shadow for such big area. Fix this by allocating KASAN shadow only for really used cpu entry area addresses mapped by cea_map_percpu_pages() Thanks to the 0day folks for finding and reporting this to be an issue. [ dhansen: tweak changelog since this will get committed before peterz's actual cpu-entry-area randomization ] Signed-off-by: Andrey Ryabinin Signed-off-by: Dave Hansen Tested-by: Yujie Liu Cc: kernel test robot Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com --- arch/x86/include/asm/kasan.h | 3 +++ arch/x86/mm/cpu_entry_area.c | 8 +++++++- arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38beda..de75306b932ef 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b61..d7081b1accca6 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -9,6 +9,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -53,8 +54,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { + phys_addr_t pa = per_cpu_ptr_to_phys(ptr); + + kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, + early_pfn_to_nid(PFN_DOWN(pa))); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + cea_set_pte(cea_vaddr, pa, prot); } static void __init percpu_setup_debug_store(unsigned int cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index e7b9b464a82f1..d1416926ad526 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -316,6 +316,18 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = (unsigned long)kasan_mem_to_shadow(va); + shadow_start = round_down(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); + shadow_end = round_up(shadow_end, PAGE_SIZE); + + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { int i; @@ -393,9 +405,6 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin); - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- GitLab From 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 14:54:41 -0700 Subject: [PATCH 602/694] x86/mm: Randomize per-cpu entry area Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings. On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little. As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space. [ bp: Fix le build. ] Reported-by: Seth Jenkins Reviewed-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 46 ++++++++++++++++++++++++--- 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 75efc4c6f0766..462fc34f13176 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995cf..4f056fb88174b 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) -#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif #endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d92..bbb0f737aab19 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) /* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true; /* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index d7081b1accca6..dff9001e5e125 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -16,16 +16,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif -#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + cea = prandom_u32_max(max_cea); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif /* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); return (struct cpu_entry_area *) va; @@ -211,7 +248,6 @@ static __init void setup_cpu_entry_area_ptes(void) /* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; @@ -227,6 +263,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu; + init_cea_offsets(); + setup_cpu_entry_area_ptes(); for_each_possible_cpu(cpu) -- GitLab From af80602799681c78f14fbe20b6185a56020dedee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:18 +0200 Subject: [PATCH 603/694] mm: Move mm_cachep initialization to mm_init() In order to allow using mm_alloc() much earlier, move initializing mm_cachep into mm_init(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.751153381@infradead.org --- include/linux/sched/task.h | 1 + init/main.c | 1 + kernel/fork.c | 32 ++++++++++++++++++-------------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index d6c48163c6def..8431558641a42 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -65,6 +65,7 @@ extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); void __noreturn make_task_dead(int signr); +extern void mm_cache_init(void); extern void proc_caches_init(void); extern void fork_init(void); diff --git a/init/main.c b/init/main.c index aa21add5f7c54..f1d1a549e1ccb 100644 --- a/init/main.c +++ b/init/main.c @@ -860,6 +860,7 @@ static void __init mm_init(void) /* Should be run after espfix64 is set up. */ pti_init(); kmsan_init_runtime(); + mm_cache_init(); } #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET diff --git a/kernel/fork.c b/kernel/fork.c index 08969f5aa38d5..451ce8063f850 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3015,10 +3015,27 @@ static void sighand_ctor(void *data) init_waitqueue_head(&sighand->signalfd_wqh); } -void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size; + /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -3036,19 +3053,6 @@ void __init proc_caches_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); -- GitLab From 3f4c8211d982099be693be9aa7d6fc4607dff290 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:21 +0200 Subject: [PATCH 604/694] x86/mm: Use mm_alloc() in poking_init() Instead of duplicating init_mm, allocate a fresh mm. The advantage is that mm_alloc() has much simpler dependencies. Additionally it makes more conceptual sense, init_mm has no (and must not have) user state to duplicate. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.816175235@infradead.org --- arch/x86/mm/init.c | 2 +- include/linux/sched/task.h | 1 - kernel/fork.c | 5 ----- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9121bc1b9453a..d3987359d4414 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -801,7 +801,7 @@ void __init poking_init(void) spinlock_t *ptl; pte_t *ptep; - poking_mm = copy_init_mm(); + poking_mm = mm_alloc(); BUG_ON(!poking_mm); /* diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 8431558641a42..357e0068497c1 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -91,7 +91,6 @@ extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); -struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); diff --git a/kernel/fork.c b/kernel/fork.c index 451ce8063f850..6142c588c18ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2592,11 +2592,6 @@ struct task_struct * __init fork_idle(int cpu) return task; } -struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. -- GitLab From 5b93a83649c7cba3a15eb7e8959b250841acb1b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:25 +0200 Subject: [PATCH 605/694] x86/mm: Initialize text poking earlier Move poking_init() up a bunch; specifically move it right after mm_init() which is right before ftrace_init(). This will allow simplifying ftrace text poking which currently has a bunch of exceptions for early boot. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.881703081@infradead.org --- init/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index f1d1a549e1ccb..5372ea2437352 100644 --- a/init/main.c +++ b/init/main.c @@ -996,7 +996,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) sort_main_extable(); trap_init(); mm_init(); - + poking_init(); ftrace_init(); /* trace_printk can be enabled here */ @@ -1135,7 +1135,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) taskstats_init_early(); delayacct_init(); - poking_init(); check_bugs(); acpi_subsystem_init(); -- GitLab From eb7d389d5b2b3c453332abc41c3eea73290cc006 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:39:47 +0200 Subject: [PATCH 606/694] x86/ftrace: Remove SYSTEM_BOOTING exceptions Now that text_poke is available before ftrace, remove the SYSTEM_BOOTING exceptions. Specifically, this cures a W+X case during boot. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.945960823@infradead.org --- arch/x86/kernel/alternative.c | 10 ---------- arch/x86/kernel/ftrace.c | 3 +-- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cadcea035e04..e240351e0bc12 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1681,11 +1681,6 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi { struct text_poke_loc *tp; - if (unlikely(system_state == SYSTEM_BOOTING)) { - text_poke_early(addr, opcode, len); - return; - } - text_poke_flush(addr); tp = &tp_vec[tp_vec_nr++]; @@ -1707,11 +1702,6 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void * { struct text_poke_loc tp; - if (unlikely(system_state == SYSTEM_BOOTING)) { - text_poke_early(addr, opcode, len); - return; - } - text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bd165004776d9..43628b8480fa1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -415,8 +415,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - if (likely(system_state != SYSTEM_BOOTING)) - set_memory_ro((unsigned long)trampoline, npages); + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: -- GitLab From 414ebf148cb5c5fa727ec51fdb69c4ab82dccf3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:39:43 +0200 Subject: [PATCH 607/694] x86/mm: Do verify W^X at boot up Straight up revert of commit: a970174d7a10 ("x86/mm: Do not verify W^X at boot up") now that the root cause has been fixed. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201058.011279208@infradead.org --- arch/x86/mm/pat/set_memory.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 2e5a045731dec..97342c42dda8e 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -587,10 +587,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star { unsigned long end; - /* Kernel text is rw at boot up */ - if (system_state == SYSTEM_BOOTING) - return new; - /* * 32-bit has some unfixable W+X issues, like EFI code * and writeable data being in the same page. Disable -- GitLab From d48567c9a0d1e605639f8a8705a61bbb55fb4e84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 26 Oct 2022 12:13:03 +0200 Subject: [PATCH 608/694] mm: Introduce set_memory_rox() Because endlessly repeating: set_memory_ro() set_memory_x() is getting tedious. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y1jek64pXOsougmz@hirez.programming.kicks-ass.net --- arch/arm/mach-omap1/sram-init.c | 8 +++----- arch/arm/mach-omap2/sram.c | 8 +++----- arch/powerpc/kernel/kprobes.c | 9 ++++----- arch/x86/kernel/ftrace.c | 5 ++--- arch/x86/kernel/kprobes/core.c | 9 ++------- drivers/misc/sram-exec.c | 7 ++----- include/linux/filter.h | 3 +-- include/linux/set_memory.h | 8 ++++++++ kernel/bpf/bpf_struct_ops.c | 3 +-- kernel/bpf/core.c | 6 ++---- kernel/bpf/trampoline.c | 3 +-- net/bpf/bpf_dummy_struct_ops.c | 3 +-- 12 files changed, 30 insertions(+), 42 deletions(-) diff --git a/arch/arm/mach-omap1/sram-init.c b/arch/arm/mach-omap1/sram-init.c index 27c42e2a21cc1..dabf0c4defebf 100644 --- a/arch/arm/mach-omap1/sram-init.c +++ b/arch/arm/mach-omap1/sram-init.c @@ -10,11 +10,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size) dst = fncpy(sram, funcp, size); - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); return dst; } @@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void) base = (unsigned long)omap_sram_base; pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); } static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl); diff --git a/arch/arm/mach-omap2/sram.c b/arch/arm/mach-omap2/sram.c index 39cf270da718a..815d390109d21 100644 --- a/arch/arm/mach-omap2/sram.c +++ b/arch/arm/mach-omap2/sram.c @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size) dst = fncpy(sram, funcp, size); - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); return dst; } @@ -217,8 +216,7 @@ static void __init omap2_map_sram(void) base = (unsigned long)omap_sram_base; pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); } static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl, diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bd7b1a0354594..7a89de3026097 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -20,12 +20,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -134,10 +134,9 @@ void *alloc_insn_page(void) if (!page) return NULL; - if (strict_module_rwx_enabled()) { - set_memory_ro((unsigned long)page, 1); - set_memory_x((unsigned long)page, 1); - } + if (strict_module_rwx_enabled()) + set_memory_rox((unsigned long)page, 1); + return page; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 43628b8480fa1..03579460d0ec4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -24,10 +24,10 @@ #include #include #include +#include #include -#include #include #include #include @@ -415,8 +415,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - set_memory_ro((unsigned long)trampoline, npages); - set_memory_x((unsigned long)trampoline, npages); + set_memory_rox((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: tramp_free(trampoline); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b99..e7b7ca64acdfa 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include #include #include "common.h" @@ -415,17 +415,12 @@ void *alloc_insn_page(void) return NULL; set_vm_flush_reset_perms(page); - /* - * First make the page read-only, and only then make it executable to - * prevent it from being W+X in between. - */ - set_memory_ro((unsigned long)page, 1); /* * TODO: Once additional kernel code protection mechanisms are set, ensure * that the page was not maliciously altered and it is still zeroed. */ - set_memory_x((unsigned long)page, 1); + set_memory_rox((unsigned long)page, 1); return page; } diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c index a948e95d43754..b71dbbd737382 100644 --- a/drivers/misc/sram-exec.c +++ b/drivers/misc/sram-exec.c @@ -10,9 +10,9 @@ #include #include #include +#include #include -#include #include "sram.h" @@ -106,10 +106,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, dst_cpy = fncpy(dst, src, size); - ret = set_memory_ro((unsigned long)base, pages); - if (ret) - goto error_out; - ret = set_memory_x((unsigned long)base, pages); + ret = set_memory_rox((unsigned long)base, pages); if (ret) goto error_out; diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed0..f0b17aff4e66b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); - set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 369769ce7399d..023ebc67a36cd 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +static inline int set_memory_rox(unsigned long addr, int numpages) +{ + int ret = set_memory_ro(addr, numpages); + if (ret) + return ret; + return set_memory_x(addr, numpages); +} + #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 84b2d9dba79a7..ece9870cab68e 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, refcount_set(&kvalue->refcnt, 1); bpf_map_inc(map); - set_memory_ro((long)st_map->image, 1); - set_memory_x((long)st_map->image, 1); + set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { /* Pair with smp_load_acquire() during lookup_elem(). diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 25a54e04560e5..b0525ea252721 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -864,8 +864,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; } @@ -883,8 +882,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) if (ptr) { bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); } goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index bf0906e1e2b97..a848922ea6f77 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -468,8 +468,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut if (err < 0) goto out; - set_memory_ro((long)im->image, 1); - set_memory_x((long)im->image, 1); + set_memory_rox((long)im->image, 1); WARN_ON(tr->cur_image && tr->selector == 0); WARN_ON(!tr->cur_image && tr->selector); diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index e78dadfc58290..9ff32324251be 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -124,8 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (err < 0) goto out; - set_memory_ro((long)image, 1); - set_memory_x((long)image, 1); + set_memory_rox((long)image, 1); prog_ret = dummy_ops_call_op(image, args); err = dummy_ops_copy_args(args); -- GitLab From 60463628c9e0a8060ac6bef0457b0505c7532c7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Oct 2022 13:19:31 +0200 Subject: [PATCH 609/694] x86/mm: Implement native set_memory_rox() Provide a native implementation of set_memory_rox(), avoiding the double set_memory_ro();set_memory_x(); calls. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/set_memory.h | 3 +++ arch/x86/mm/pat/set_memory.c | 10 ++++++++++ include/linux/set_memory.h | 2 ++ 3 files changed, 15 insertions(+) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index b45c4d27fd46e..a5e89641bd2da 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -6,6 +6,9 @@ #include #include +#define set_memory_rox set_memory_rox +int set_memory_rox(unsigned long addr, int numpages); + /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 97342c42dda8e..f275605892df9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2025,6 +2025,16 @@ int set_memory_ro(unsigned long addr, int numpages) return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); } +int set_memory_rox(unsigned long addr, int numpages) +{ + pgprot_t clr = __pgprot(_PAGE_RW); + + if (__supported_pte_mask & _PAGE_NX) + clr.pgprot |= _PAGE_NX; + + return change_page_attr_clear(&addr, numpages, clr, 0); +} + int set_memory_rw(unsigned long addr, int numpages) { return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 023ebc67a36cd..95ac8398ee72d 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,7 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifndef set_memory_rox static inline int set_memory_rox(unsigned long addr, int numpages) { int ret = set_memory_ro(addr, numpages); @@ -21,6 +22,7 @@ static inline int set_memory_rox(unsigned long addr, int numpages) return ret; return set_memory_x(addr, numpages); } +#endif #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) -- GitLab From 93b3037a1482758349f3b0431406bcc457ca1cbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 14:04:46 +0100 Subject: [PATCH 610/694] mm: Update ptep_get_lockless()'s comment Improve the comment. Suggested-by: Matthew Wilcox Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.515572025%40infradead.org --- include/linux/pgtable.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962b..c0b29000c3c0c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -300,15 +300,12 @@ static inline pte_t ptep_get(pte_t *ptep) #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. + * For walking the pagetables without holding any locks. Some architectures + * (eg x86-32 PAE) cannot load the entries atomically without using expensive + * instructions. We are guaranteed that a PTE will only either go from not + * present to present, or present to not present -- it will not switch to a + * completely different present page without a TLB flush inbetween; which we + * are blocking by holding interrupts off. * * Setting ptes from not present to present goes: * -- GitLab From fbfdec9989e69e0b17aa3bf32fcb22d04cc33301 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:02:29 +0100 Subject: [PATCH 611/694] x86/mm/pae: Make pmd_t similar to pte_t Instead of mucking about with at least 2 different ways of fudging it, do the same thing we do for pte_t. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.580310787%40infradead.org --- arch/x86/include/asm/pgtable-3level.h | 42 +++++++-------------- arch/x86/include/asm/pgtable-3level_types.h | 7 ++++ arch/x86/include/asm/pgtable_64_types.h | 1 + arch/x86/include/asm/pgtable_types.h | 4 +- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 28421a8872093..28556d22feb89 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -87,7 +87,7 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) ret |= ((pmdval_t)*(tmp + 1)) << 32; } - return (pmd_t) { ret }; + return (pmd_t) { .pmd = ret }; } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -121,12 +121,11 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, ptep->pte_high = 0; } -static inline void native_pmd_clear(pmd_t *pmd) +static inline void native_pmd_clear(pmd_t *pmdp) { - u32 *tmp = (u32 *)pmd; - *tmp = 0; + pmdp->pmd_low = 0; smp_wmb(); - *(tmp + 1) = 0; + pmdp->pmd_high = 0; } static inline void native_pud_clear(pud_t *pudp) @@ -162,25 +161,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -union split_pmd { - struct { - u32 pmd_low; - u32 pmd_high; - }; - pmd_t pmd; -}; - #ifdef CONFIG_SMP static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) { - union split_pmd res, *orig = (union split_pmd *)pmdp; + pmd_t res; /* xchg acts as a barrier before setting of the high bits */ - res.pmd_low = xchg(&orig->pmd_low, 0); - res.pmd_high = orig->pmd_high; - orig->pmd_high = 0; + res.pmd_low = xchg(&pmdp->pmd_low, 0); + res.pmd_high = READ_ONCE(pmdp->pmd_high); + WRITE_ONCE(pmdp->pmd_high, 0); - return res.pmd; + return res; } #else #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) @@ -199,17 +190,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * anybody. */ if (!(pmd_val(pmd) & _PAGE_PRESENT)) { - union split_pmd old, new, *ptr; - - ptr = (union split_pmd *)pmdp; - - new.pmd = pmd; - /* xchg acts as a barrier before setting of the high bits */ - old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low); - old.pmd_high = ptr->pmd_high; - ptr->pmd_high = new.pmd_high; - return old.pmd; + old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low); + old.pmd_high = READ_ONCE(pmdp->pmd_high); + WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high); + + return old; } do { diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 56baf43befb49..80911349519e8 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -18,6 +18,13 @@ typedef union { }; pteval_t pte; } pte_t; + +typedef union { + struct { + unsigned long pmd_low, pmd_high; + }; + pmdval_t pmd; +} pmd_t; #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 04f36063ad546..38bf837e35544 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -19,6 +19,7 @@ typedef unsigned long pgdval_t; typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +typedef struct { pmdval_t pmd; } pmd_t; #ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index aa174fed3a71c..447d4bee25c48 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -361,11 +361,9 @@ static inline pudval_t native_pud_val(pud_t pud) #endif #if CONFIG_PGTABLE_LEVELS > 2 -typedef struct { pmdval_t pmd; } pmd_t; - static inline pmd_t native_make_pmd(pmdval_t val) { - return (pmd_t) { val }; + return (pmd_t) { .pmd = val }; } static inline pmdval_t native_pmd_val(pmd_t pmd) -- GitLab From 0862ff059c9e29f023e617b134f9ea332cae50b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:05:55 +0100 Subject: [PATCH 612/694] sh/mm: Make pmd_t similar to pte_t Just like 64bit pte_t, have a low/high split in pmd_t. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.645657294%40infradead.org --- arch/sh/include/asm/pgtable-3level.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index cdced80a7ffa3..a889a3a938bab 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -28,9 +28,15 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) -typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { + struct { + unsigned long pmd_low; + unsigned long pmd_high; + }; + unsigned long long pmd; +} pmd_t; #define pmd_val(x) ((x).pmd) -#define __pmd(x) ((pmd_t) { (x) } ) +#define __pmd(x) ((pmd_t) { .pmd = (x) } ) static inline pmd_t *pud_pgtable(pud_t pud) { -- GitLab From 024d232ae4fcd7a7ce8ea239607d6c1246d7adc8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:16:22 +0100 Subject: [PATCH 613/694] mm: Fix pmd_read_atomic() AFAICT there's no reason to do anything different than what we do for PTEs. Make it so (also affects SH). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.711181252%40infradead.org --- arch/x86/include/asm/pgtable-3level.h | 56 --------------------------- include/linux/pgtable.h | 47 +++++++++++++++++----- 2 files changed, 37 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 28556d22feb89..94f50b0100a51 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -34,62 +34,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } -#define pmd_read_atomic pmd_read_atomic -/* - * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by GCC. Problem is, in certain places - * where pte_offset_map_lock() is called, concurrent page faults are - * allowed, if the mmap_lock is hold for reading. An example is mincore - * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate() rightfully does a set_64bit(), but if we're reading the - * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because GCC will not read the 64-bit value of the pmd atomically. - * - * To fix this all places running pte_offset_map_lock() while holding the - * mmap_lock in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null or not, and in turn to know if - * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd - * operations. - * - * Without THP if the mmap_lock is held for reading, the pmd can only - * transition from null to not null while pmd_read_atomic() runs. So - * we can always return atomic pmd values with this function. - * - * With THP if the mmap_lock is held for reading, the pmd can become - * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic(). We could read it truly - * atomically here with an atomic64_read() for the THP enabled case (and - * it would be a whole lot simpler), but to avoid using cmpxchg8b we - * only return an atomic pmdval if the low part of the pmdval is later - * found to be stable (i.e. pointing to a pte). We are also returning a - * 'none' (zero) pmdval if the low part of the pmd is zero. - * - * In some cases the high and low part of the pmdval returned may not be - * consistent if THP is enabled (the low part may point to previously - * mapped hugepage, while the high part may point to a more recently - * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only - * needs the low part of the pmd to be read atomically to decide if the - * pmd is unstable or not, with the only exception when the low part - * of the pmd is zero, in which case we return a 'none' pmd. - */ -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - pmdval_t ret; - u32 *tmp = (u32 *)pmdp; - - ret = (pmdval_t) (*tmp); - if (ret) { - /* - * If the low part is null, we must not read the high part - * or we can end up with a partial pmd. - */ - smp_rmb(); - ret |= ((pmdval_t)*(tmp + 1)) << 32; - } - - return (pmd_t) { .pmd = ret }; -} - static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c0b29000c3c0c..765fd4bf420f8 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -298,6 +298,13 @@ static inline pte_t ptep_get(pte_t *ptep) } #endif +#ifndef __HAVE_ARCH_PMDP_GET +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* * For walking the pagetables without holding any locks. Some architectures @@ -340,15 +347,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep) return pte; } -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#define ptep_get_lockless ptep_get_lockless + +#if CONFIG_PGTABLE_LEVELS > 2 +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + pmd_t pmd; + + do { + pmd.pmd_low = pmdp->pmd_low; + smp_rmb(); + pmd.pmd_high = pmdp->pmd_high; + smp_rmb(); + } while (unlikely(pmd.pmd_low != pmdp->pmd_low)); + + return pmd; +} +#define pmdp_get_lockless pmdp_get_lockless +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ + /* * We require that the PTE can be read atomically. */ +#ifndef ptep_get_lockless static inline pte_t ptep_get_lockless(pte_t *ptep) { return ptep_get(ptep); } -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif + +#ifndef pmdp_get_lockless +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + return pmdp_get(pmdp); +} +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1318,17 +1352,10 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -#ifndef pmd_read_atomic static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { - /* - * Depend on compiler for an atomic pmd read. NOTE: this is - * only going to work, if the pmdval_t isn't larger than - * an unsigned long. - */ - return *pmdp; + return pmdp_get_lockless(pmdp); } -#endif #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) -- GitLab From 6ca297d4784625de7b041e8451780643cf5751a4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Oct 2022 14:51:44 +0200 Subject: [PATCH 614/694] mm: Rename GUP_GET_PTE_LOW_HIGH Since it no longer applies to only PTEs, rename it to PXX. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.776404066%40infradead.org --- arch/mips/Kconfig | 2 +- arch/sh/Kconfig | 2 +- arch/x86/Kconfig | 2 +- include/linux/pgtable.h | 4 ++-- mm/Kconfig | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b26b77673c2cc..15cb692b0a097 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -46,7 +46,7 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL - select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT + select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5f220e903e5ab..0665ac0add0b4 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -24,7 +24,7 @@ config SUPERH select GENERIC_PCI_IOMAP if PCI select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD - select GUP_GET_PTE_LOW_HIGH if X2TLB + select GUP_GET_PXX_LOW_HIGH if X2TLB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67745ceab0dbc..bb1f326ca7280 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -157,7 +157,7 @@ config X86 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS - select GUP_GET_PTE_LOW_HIGH if X86_PAE + select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 765fd4bf420f8..7dd3df7425439 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -305,7 +305,7 @@ static inline pmd_t pmdp_get(pmd_t *pmdp) } #endif -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH /* * For walking the pagetables without holding any locks. Some architectures * (eg x86-32 PAE) cannot load the entries atomically without using expensive @@ -365,7 +365,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) } #define pmdp_get_lockless pmdp_get_lockless #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ /* * We require that the PTE can be read atomically. diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b5052..0eabd0beb3453 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1044,7 +1044,7 @@ config GUP_TEST comment "GUP_TEST needs to have DEBUG_FS enabled" depends on !GUP_TEST && !DEBUG_FS -config GUP_GET_PTE_LOW_HIGH +config GUP_GET_PXX_LOW_HIGH bool config ARCH_HAS_PTE_SPECIAL -- GitLab From dab6e717429e5ec795d558a0e9a5337a1ed33a3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:20:28 +0100 Subject: [PATCH 615/694] mm: Rename pmd_read_atomic() There's no point in having the identical routines for PTE/PMD have different names. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.841277397%40infradead.org --- include/linux/pgtable.h | 9 ++------- mm/hmm.c | 2 +- mm/khugepaged.c | 2 +- mm/mapping_dirty_helpers.c | 2 +- mm/mprotect.c | 2 +- mm/userfaultfd.c | 2 +- mm/vmscan.c | 4 ++-- 7 files changed, 9 insertions(+), 14 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 7dd3df7425439..23348528ff369 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1352,11 +1352,6 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - return pmdp_get_lockless(pmdp); -} - #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) #endif @@ -1383,13 +1378,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. * * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, - * pmd_read_atomic is allowed to return a not atomic pmdval + * pmdp_get_lockless is allowed to return a not atomic pmdval * (for example pointing to an hugepage that has never been * mapped in the pmd). The below checks will only care about * the low part of the pmd with 32bit PAE x86 anyway, with the diff --git a/mm/hmm.c b/mm/hmm.c index 3850fb625dda1..39cf50de76d78 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -361,7 +361,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * huge or device mapping one and compute corresponding pfn * values. */ - pmd = pmd_read_atomic(pmdp); + pmd = pmdp_get_lockless(pmdp); barrier(); if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f79407..52237a777ebd5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -862,7 +862,7 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm, if (!*pmd) return SCAN_PMD_NULL; - pmde = pmd_read_atomic(*pmd); + pmde = pmdp_get_lockless(*pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* See comments in pmd_none_or_trans_huge_or_clear_bad() */ diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index 1b0ab8fcfd8bf..175e424b9ab1b 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -126,7 +126,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr, static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); if (!pmd_trans_unstable(&pmdval)) return 0; diff --git a/mm/mprotect.c b/mm/mprotect.c index 668bfaa6ed2ae..f006bafe338f7 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -294,7 +294,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, */ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3d0fef3980b3f..89a1c9ca06f8b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -630,7 +630,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, break; } - dst_pmdval = pmd_read_atomic(dst_pmd); + dst_pmdval = pmdp_get_lockless(dst_pmd); /* * If the dst_pmd is mapped as THP don't * override it and just be strict. diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e52164..88ef873b2d83e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4039,9 +4039,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, /* walk_pte_range() may call get_next_vma() */ vma = args->vma; for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { - pmd_t val = pmd_read_atomic(pmd + i); + pmd_t val = pmdp_get_lockless(pmd + i); - /* for pmd_read_atomic() */ + /* for pmdp_get_lockless() */ barrier(); next = pmd_addr_end(addr, end); -- GitLab From 1180e732c985ed3c8866d2fd9e02b619848404a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:21:30 +0100 Subject: [PATCH 616/694] mm/gup: Fix the lockless PMD access On architectures where the PTE/PMD is larger than the native word size (i386-PAE for example), READ_ONCE() can do the wrong thing. Use pmdp_get_lockless() just like we use ptep_get_lockless(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.906110403%40infradead.org --- kernel/events/core.c | 2 +- mm/gup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4ec3717003d55..b0d38424f3f77 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7306,7 +7306,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) return pud_leaf_size(pud); pmdp = pmd_offset_lockless(pudp, pud, addr); - pmd = READ_ONCE(*pmdp); + pmd = pmdp_get_lockless(pmdp); if (!pmd_present(pmd)) return 0; diff --git a/mm/gup.c b/mm/gup.c index fe195d47de74a..ff8b22327e165 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2808,7 +2808,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo pmdp = pmd_offset_lockless(pudp, pud, addr); do { - pmd_t pmd = READ_ONCE(*pmdp); + pmd_t pmd = pmdp_get_lockless(pmdp); next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) -- GitLab From 7a9b8bdb6af3e19fb8e3dc7a3caf6a9ea1bed8cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:38:42 +0100 Subject: [PATCH 617/694] x86/mm/pae: Don't (ab)use atomic64 PAE implies CX8, write readable code. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.971450128%40infradead.org --- arch/x86/include/asm/pgtable-3level.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 94f50b0100a51..0a1b81dc72c04 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_H #define _ASM_X86_PGTABLE_3LEVEL_H -#include - /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -95,11 +93,12 @@ static inline void pud_clear(pud_t *pudp) #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { - pte_t res; + pte_t old = *ptep; - res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); + do { + } while (!try_cmpxchg64(&ptep->pte, &old.pte, 0ULL)); - return res; + return old; } #else #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) -- GitLab From f7bcd4617de67a4700a7bd7dc56808b57f1c8748 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:40:12 +0100 Subject: [PATCH 618/694] x86/mm/pae: Use WRITE_ONCE() Disallow write-tearing, that would be really unfortunate. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114425.038102604%40infradead.org --- arch/x86/include/asm/pgtable-3level.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 0a1b81dc72c04..d3a24929ddff9 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -27,9 +27,9 @@ */ static inline void native_set_pte(pte_t *ptep, pte_t pte) { - ptep->pte_high = pte.pte_high; + WRITE_ONCE(ptep->pte_high, pte.pte_high); smp_wmb(); - ptep->pte_low = pte.pte_low; + WRITE_ONCE(ptep->pte_low, pte.pte_low); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -58,16 +58,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud) static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep->pte_low = 0; + WRITE_ONCE(ptep->pte_low, 0); smp_wmb(); - ptep->pte_high = 0; + WRITE_ONCE(ptep->pte_high, 0); } static inline void native_pmd_clear(pmd_t *pmdp) { - pmdp->pmd_low = 0; + WRITE_ONCE(pmdp->pmd_low, 0); smp_wmb(); - pmdp->pmd_high = 0; + WRITE_ONCE(pmdp->pmd_high, 0); } static inline void native_pud_clear(pud_t *pudp) -- GitLab From b7301f20105a27112f7ca8040cfb0b0505a32fbd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Nov 2020 12:21:25 +0100 Subject: [PATCH 619/694] x86/mm/pae: Be consistent with pXXp_get_and_clear() Given that ptep_get_and_clear() uses cmpxchg8b, and that should be by far the most common case, there's no point in having an optimized variant for pmd/pud. Introduce the pxx_xchg64() helper to implement the common logic once. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114425.103392961%40infradead.org --- arch/x86/include/asm/pgtable-3level.h | 67 +++++++-------------------- 1 file changed, 17 insertions(+), 50 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index d3a24929ddff9..93c82c6ce6e33 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -90,34 +90,33 @@ static inline void pud_clear(pud_t *pudp) */ } + +#define pxx_xchg64(_pxx, _ptr, _val) ({ \ + _pxx##val_t *_p = (_pxx##val_t *)_ptr; \ + _pxx##val_t _o = *_p; \ + do { } while (!try_cmpxchg64(_p, &_o, (_val))); \ + native_make_##_pxx(_o); \ +}) + #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { - pte_t old = *ptep; - - do { - } while (!try_cmpxchg64(&ptep->pte, &old.pte, 0ULL)); - - return old; + return pxx_xchg64(pte, ptep, 0ULL); } -#else -#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) -#endif -#ifdef CONFIG_SMP static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) { - pmd_t res; - - /* xchg acts as a barrier before setting of the high bits */ - res.pmd_low = xchg(&pmdp->pmd_low, 0); - res.pmd_high = READ_ONCE(pmdp->pmd_high); - WRITE_ONCE(pmdp->pmd_high, 0); + return pxx_xchg64(pmd, pmdp, 0ULL); +} - return res; +static inline pud_t native_pudp_get_and_clear(pud_t *pudp) +{ + return pxx_xchg64(pud, pudp, 0ULL); } #else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) #endif #ifndef pmdp_establish @@ -141,42 +140,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, return old; } - do { - old = *pmdp; - } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); - - return old; + return pxx_xchg64(pmd, pmdp, pmd.pmd); } #endif -#ifdef CONFIG_SMP -union split_pud { - struct { - u32 pud_low; - u32 pud_high; - }; - pud_t pud; -}; - -static inline pud_t native_pudp_get_and_clear(pud_t *pudp) -{ - union split_pud res, *orig = (union split_pud *)pudp; - -#ifdef CONFIG_PAGE_TABLE_ISOLATION - pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0)); -#endif - - /* xchg acts as a barrier before setting of the high bits */ - res.pud_low = xchg(&orig->pud_low, 0); - res.pud_high = orig->pud_high; - orig->pud_high = 0; - - return res.pud; -} -#else -#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) -#endif - /* Encode and de-code a swap entry */ #define SWP_TYPE_BITS 5 -- GitLab From 9ee850acd25dc290d3cad2707e99380e372ad490 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Oct 2022 14:11:38 +0200 Subject: [PATCH 620/694] x86_64: Remove pointless set_64bit() usage The use of set_64bit() in X86_64 only code is pretty pointless, seeing how it's a direct assignment. Remove all this nonsense. [nathanchance: unbreak irte] Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114425.168036718%40infradead.org --- arch/um/include/asm/pgtable-3level.h | 8 -------- arch/x86/include/asm/cmpxchg_64.h | 5 ----- drivers/iommu/intel/irq_remapping.c | 13 +++++-------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index cb896e6121c8e..8a5032ec231fd 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -58,11 +58,7 @@ #define pud_populate(mm, pud, pmd) \ set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) -#ifdef CONFIG_64BIT -#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval)) -#else #define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) -#endif static inline int pgd_newpage(pgd_t pgd) { @@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd) static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } -#ifdef CONFIG_64BIT -#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval)) -#else #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) -#endif static inline void pud_clear (pud_t *pud) { diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 250187ac82484..0d3beb27b7fe4 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -2,11 +2,6 @@ #ifndef _ASM_X86_CMPXCHG_64_H #define _ASM_X86_CMPXCHG_64_H -static inline void set_64bit(volatile u64 *ptr, u64 val) -{ - *ptr = val; -} - #define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 5962bb5027d06..5d176168bb760 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -173,7 +173,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) if ((irte->pst == 1) || (irte_modified->pst == 1)) { bool ret; @@ -187,11 +186,9 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, * same as the old value. */ WARN_ON(!ret); - } else -#endif - { - set_64bit(&irte->low, irte_modified->low); - set_64bit(&irte->high, irte_modified->high); + } else { + WRITE_ONCE(irte->low, irte_modified->low); + WRITE_ONCE(irte->high, irte_modified->high); } __iommu_flush_cache(iommu, irte, sizeof(*irte)); @@ -249,8 +246,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) end = start + (1 << irq_iommu->irte_mask); for (entry = start; entry < end; entry++) { - set_64bit(&entry->low, 0); - set_64bit(&entry->high, 0); + WRITE_ONCE(entry->low, 0); + WRITE_ONCE(entry->high, 0); } bitmap_release_region(iommu->ir_table->bitmap, index, irq_iommu->irte_mask); -- GitLab From d4a72e7fe61a1ea9ad4accf3532411ca685eaead Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Oct 2022 13:36:24 +0200 Subject: [PATCH 621/694] x86/mm/pae: Get rid of set_64bit() Recognise that set_64bit() is a special case of our previously introduced pxx_xchg64(), so use that and get rid of set_64bit(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114425.233481884%40infradead.org --- arch/x86/include/asm/cmpxchg_32.h | 28 --------------------------- arch/x86/include/asm/pgtable-3level.h | 23 +++++++++++----------- 2 files changed, 12 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 215f5a65790fd..6ba80ce9438dd 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -7,34 +7,6 @@ * you need to test for the feature in boot_cpu_data. */ -/* - * CMPXCHG8B only writes to the target if we had the previous - * value in registers, otherwise it acts as a read and gives us the - * "new previous" value. That is why there is a loop. Preloading - * EDX:EAX is a performance optimization: in the common case it means - * we need only one locked operation. - * - * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very - * least an FPU save and/or %cr0.ts manipulation. - * - * cmpxchg8b must be used with the lock prefix here to allow the - * instruction to be executed atomically. We need to have the reader - * side to see the coherent 64bit value. - */ -static inline void set_64bit(volatile u64 *ptr, u64 value) -{ - u32 low = value; - u32 high = value >> 32; - u64 prev = *ptr; - - asm volatile("\n1:\t" - LOCK_PREFIX "cmpxchg8b %0\n\t" - "jnz 1b" - : "=m" (*ptr), "+A" (prev) - : "b" (low), "c" (high) - : "memory"); -} - #ifdef CONFIG_X86_CMPXCHG64 #define arch_cmpxchg64(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 93c82c6ce6e33..967b135fa2c01 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -19,7 +19,15 @@ pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -/* Rules for using set_pte: the pte being assigned *must* be +#define pxx_xchg64(_pxx, _ptr, _val) ({ \ + _pxx##val_t *_p = (_pxx##val_t *)_ptr; \ + _pxx##val_t _o = *_p; \ + do { } while (!try_cmpxchg64(_p, &_o, (_val))); \ + native_make_##_pxx(_o); \ +}) + +/* + * Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is * not possible, use pte_get_and_clear to obtain the old pte @@ -34,12 +42,12 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { - set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); + pxx_xchg64(pte, ptep, native_pte_val(pte)); } static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); + pxx_xchg64(pmd, pmdp, native_pmd_val(pmd)); } static inline void native_set_pud(pud_t *pudp, pud_t pud) @@ -47,7 +55,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud) #ifdef CONFIG_PAGE_TABLE_ISOLATION pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); #endif - set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); + pxx_xchg64(pud, pudp, native_pud_val(pud)); } /* @@ -91,13 +99,6 @@ static inline void pud_clear(pud_t *pudp) } -#define pxx_xchg64(_pxx, _ptr, _val) ({ \ - _pxx##val_t *_p = (_pxx##val_t *)_ptr; \ - _pxx##val_t _o = *_p; \ - do { } while (!try_cmpxchg64(_p, &_o, (_val))); \ - native_make_##_pxx(_o); \ -}) - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { -- GitLab From eb780dcae02d5a71e6979aa7b8c708dea8597adf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Oct 2022 13:47:29 +0200 Subject: [PATCH 622/694] mm: Remove pointless barrier() after pmdp_get_lockless() pmdp_get_lockless() should itself imply any ordering required. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114425.298833095%40infradead.org --- mm/hmm.c | 1 - mm/vmscan.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 39cf50de76d78..601a99ce3c84d 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -362,7 +362,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * values. */ pmd = pmdp_get_lockless(pmdp); - barrier(); if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; diff --git a/mm/vmscan.c b/mm/vmscan.c index 88ef873b2d83e..4936a88bb26a7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4041,9 +4041,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { pmd_t val = pmdp_get_lockless(pmd + i); - /* for pmdp_get_lockless() */ - barrier(); - next = pmd_addr_end(addr, end); if (!pmd_present(val) || is_huge_zero_pmd(val)) { -- GitLab From 2dff2c359e829245bc3d80e42e296876d1f0cf8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 1 Nov 2022 12:53:18 +0100 Subject: [PATCH 623/694] mm: Convert __HAVE_ARCH_P..P_GET to the new style Since __HAVE_ARCH_* style guards have been depricated in favour of defining the function name onto itself, convert pxxp_get(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y2EUEBlQXNgaJgoI@hirez.programming.kicks-ass.net --- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- include/linux/pgtable.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0d40b33184ebe..cb1ac02ae8ee0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -263,7 +263,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p } #ifdef CONFIG_PPC_16K_PAGES -#define __HAVE_ARCH_PTEP_GET +#define ptep_get ptep_get static inline pte_t ptep_get(pte_t *ptep) { pte_basic_t val = READ_ONCE(ptep->pte); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 23348528ff369..70e2a7e06a767 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -291,14 +291,14 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, ptep_get_and_clear(mm, addr, ptep); } -#ifndef __HAVE_ARCH_PTEP_GET +#ifndef ptep_get static inline pte_t ptep_get(pte_t *ptep) { return READ_ONCE(*ptep); } #endif -#ifndef __HAVE_ARCH_PMDP_GET +#ifndef pmdp_get static inline pmd_t pmdp_get(pmd_t *pmdp) { return READ_ONCE(*pmdp); -- GitLab From 82328227db8f0b9b5f77bb5afcd47e59d0e4d08f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 16 May 2022 18:52:02 +0000 Subject: [PATCH 624/694] x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros Other architectures and the common mm/ use P*D_MASK, and P*D_SIZE. Remove the duplicated P*D_PAGE_MASK and P*D_PAGE_SIZE which are only used in x86/*. Signed-off-by: Pasha Tatashin Signed-off-by: Borislav Petkov Reviewed-by: Anshuman Khandual Acked-by: Mike Rapoport Link: https://lore.kernel.org/r/20220516185202.604654-1-tatashin@google.com --- arch/x86/include/asm/page_types.h | 12 +++--------- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/mm/mem_encrypt_boot.S | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 18 +++++++++--------- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pti.c | 2 +- 7 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a506a411474d4..86bd4311daf8a 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -11,20 +11,14 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 19a0207e529fe..56a917df410d3 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_SIZE) - a; if (iommu_size < 64*1024*1024) { pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a3cfaf6b72ad..387e4b12e823a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr, load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); /* Is the address not 2M aligned? */ - if (load_delta & ~PMD_PAGE_MASK) + if (load_delta & ~PMD_MASK) for (;;); /* Include the SME encryption mask in the fixup value */ diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 9de3d900bc927..e25288ee33c2d 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute) * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) - * - intermediate copy buffer (PMD_PAGE_SIZE) + * - intermediate copy buffer (PMD_SIZE) * R8 - physical address of the pagetables to use for encryption */ @@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy) wbinvd /* Invalidate any cache entries */ /* Copy/encrypt up to 2MB at a time */ - movq $PMD_PAGE_SIZE, %r12 + movq $PMD_SIZE, %r12 1: cmpq %r12, %r9 jnb 2f diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index f415498d3175c..88cccd65029db 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -93,7 +93,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); +static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; @@ -198,8 +198,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; + ppd->vaddr += PMD_SIZE; + ppd->paddr += PMD_SIZE; } } @@ -225,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, vaddr_end = ppd->vaddr_end; /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); __sme_map_range_pte(ppd); /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + ppd->vaddr_end = vaddr_end & PMD_MASK; __sme_map_range_pmd(ppd); /* If end is not 2MB aligned, create PTE entries */ @@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) /* Physical addresses gives us the identity mapped virtual addresses */ kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * executable encryption area size: * stack page (PAGE_SIZE) * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) + * intermediate copy buffer (PMD_SIZE) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; /* @@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); /* * Set the address to the start of where newly created pagetable diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index f275605892df9..06eb8910462f1 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -743,11 +743,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) switch (level) { case PG_LEVEL_1G: phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; + offset = virt_addr & ~PUD_MASK; break; case PG_LEVEL_2M: phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; + offset = virt_addr & ~PMD_MASK; break; default: phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1037,7 +1037,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, case PG_LEVEL_1G: ref_prot = pud_pgprot(*(pud_t *)kpte); ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pfninc = PMD_SIZE >> PAGE_SHIFT; lpaddr = address & PUD_MASK; lpinc = PMD_SIZE; /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ffe3b3a087fea..78414c6d1b5ed 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void) * of the image. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); /* * This clears _PAGE_GLOBAL from the entire kernel image. -- GitLab From 80d0969aa7832bfeb287cb22563a1ad08fea937d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 9 Nov 2022 19:51:25 +0300 Subject: [PATCH 625/694] x86/mm: Fix CR3_ADDR_MASK The mask must not include bits above physical address mask. These bits are reserved and can be used for other things. Bits 61 and 62 are used for Linear Address Masking. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Rick Edgecombe Reviewed-by: Alexander Potapenko Acked-by: Peter Zijlstra (Intel) Tested-by: Alexander Potapenko Link: https://lore.kernel.org/all/20221109165140.9137-2-kirill.shutemov%40linux.intel.com --- arch/x86/include/asm/processor-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 02c2cbda4a74e..a7f3d9100adb6 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -35,7 +35,7 @@ */ #ifdef CONFIG_X86_64 /* Mask off the address space ID and SME encryption bits. */ -#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) +#define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) -- GitLab From 5ceeee7571b7628f439ae0444ec41d132558f47e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:50 +0100 Subject: [PATCH 626/694] x86/mm: Add a few comments It's a shame to hide useful comments in Changelogs, add some to the code. Shamelessly stolen from commit: c40a56a7818c ("x86/mm/init: Remove freed kernel image areas from alias mapping") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.460677011%40infradead.org --- arch/x86/mm/pat/set_memory.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 06eb8910462f1..50f81ea1fbad3 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -219,6 +219,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) #ifdef CONFIG_X86_64 +/* + * The kernel image is mapped into two places in the virtual address space + * (addresses without KASLR, of course): + * + * 1. The kernel direct map (0xffff880000000000) + * 2. The "high kernel map" (0xffffffff81000000) + * + * We actually execute out of #2. If we get the address of a kernel symbol, it + * points to #2, but almost all physical-to-virtual translations point to #1. + * + * This is so that we can have both a directmap of all physical memory *and* + * take full advantage of the the limited (s32) immediate addressing range (2G) + * of x86_64. + * + * See Documentation/x86/x86_64/mm.rst for more detail. + */ + static inline unsigned long highmap_start_pfn(void) { return __pa_symbol(_text) >> PAGE_SHIFT; @@ -1626,6 +1643,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); +/* + * Check the directmap and "high kernel map" 'aliases'. + */ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; -- GitLab From ef9ab81af6e1f7b7ff589aa1504434aa5915c1df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:54 +0100 Subject: [PATCH 627/694] x86/mm: Untangle __change_page_attr_set_clr(.checkalias) The .checkalias argument to __change_page_attr_set_clr() is overloaded and serves two different purposes: - it inhibits the call to cpa_process_alias() -- as suggested by the name; however, - it also serves as 'primary' indicator for __change_page_attr() ( which in turn also serves as a recursion terminator for cpa_process_alias() ). Untangle these by extending the use of CPA_NO_CHECK_ALIAS to all callsites that currently use .checkalias=0 for this purpose. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.527267183%40infradead.org --- arch/x86/mm/pat/set_memory.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 50f81ea1fbad3..4943f6c5d8d22 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1727,7 +1727,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) if (ret) goto out; - if (checkalias) { + if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { ret = cpa_process_alias(cpa); if (ret) goto out; @@ -1801,18 +1801,12 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; - cpa.flags = 0; + cpa.flags = in_flag; cpa.curpage = 0; cpa.force_split = force_split; - if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa.flags |= in_flag; - /* No alias checking for _NX bit modifications */ checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - /* Has caller explicitly disabled alias checking? */ - if (in_flag & CPA_NO_CHECK_ALIAS) - checkalias = 0; ret = __change_page_attr_set_clr(&cpa, checkalias); @@ -2067,11 +2061,9 @@ int set_memory_np(unsigned long addr, int numpages) int set_memory_np_noalias(unsigned long addr, int numpages) { - int cpa_flags = CPA_NO_CHECK_ALIAS; - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), __pgprot(_PAGE_PRESENT), 0, - cpa_flags, NULL); + CPA_NO_CHECK_ALIAS, NULL); } int set_memory_4k(unsigned long addr, int numpages) @@ -2288,7 +2280,7 @@ static int __set_pages_p(struct page *page, int numpages) .numpages = numpages, .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), .mask_clr = __pgprot(0), - .flags = 0}; + .flags = CPA_NO_CHECK_ALIAS }; /* * No alias checking needed for setting present flag. otherwise, @@ -2296,7 +2288,7 @@ static int __set_pages_p(struct page *page, int numpages) * mappings (this adds to complexity if we want to do this from * atomic context especially). Let's keep it simple! */ - return __change_page_attr_set_clr(&cpa, 0); + return __change_page_attr_set_clr(&cpa, 1); } static int __set_pages_np(struct page *page, int numpages) @@ -2307,7 +2299,7 @@ static int __set_pages_np(struct page *page, int numpages) .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0}; + .flags = CPA_NO_CHECK_ALIAS }; /* * No alias checking needed for setting not present flag. otherwise, @@ -2315,7 +2307,7 @@ static int __set_pages_np(struct page *page, int numpages) * mappings (this adds to complexity if we want to do this from * atomic context especially). Let's keep it simple! */ - return __change_page_attr_set_clr(&cpa, 0); + return __change_page_attr_set_clr(&cpa, 1); } int set_direct_map_invalid_noflush(struct page *page) @@ -2386,7 +2378,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), - .flags = 0, + .flags = CPA_NO_CHECK_ALIAS, }; WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); @@ -2399,7 +2391,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); - retval = __change_page_attr_set_clr(&cpa, 0); + retval = __change_page_attr_set_clr(&cpa, 1); __flush_tlb_all(); out: @@ -2429,12 +2421,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0, + .flags = CPA_NO_CHECK_ALIAS, }; WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); - retval = __change_page_attr_set_clr(&cpa, 0); + retval = __change_page_attr_set_clr(&cpa, 1); __flush_tlb_all(); return retval; -- GitLab From d597416683d587e940faa35945fba162329b5a71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:57 +0100 Subject: [PATCH 628/694] x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias() There is a cludge in change_page_attr_set_clr() that inhibits propagating NX changes to the aliases (directmap and highmap) -- this is a cludge twofold: - it also inhibits the primary checks in __change_page_attr(); - it hard depends on single bit changes. The introduction of set_memory_rox() triggered this last issue for clearing both _PAGE_RW and _PAGE_NX. Explicitly ignore _PAGE_NX in cpa_process_alias() instead. Fixes: b38994948567 ("x86/mm: Implement native set_memory_rox()") Reported-by: kernel test robot Debugged-by: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.594991716%40infradead.org --- arch/x86/mm/pat/set_memory.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 4943f6c5d8d22..beef774171154 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1669,6 +1669,12 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + /* Directmap always has NX set, do not modify. */ + if (__supported_pte_mask & _PAGE_NX) { + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; + } + cpa->force_flush_all = 1; ret = __change_page_attr_set_clr(&alias_cpa, 0); @@ -1691,6 +1697,15 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + /* + * [_text, _brk_end) also covers data, do not modify NX except + * in cases where the highmap is the primary target. + */ + if (__supported_pte_mask & _PAGE_NX) { + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; + } + cpa->force_flush_all = 1; /* * The high mapping range is imprecise, so ignore the @@ -1709,6 +1724,12 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) unsigned long rempages = numpages; int ret = 0; + /* + * No changes, easy! + */ + if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr))) + return ret; + while (rempages) { /* * Store the remaining nr of pages for the large page @@ -1755,7 +1776,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, struct page **pages) { struct cpa_data cpa; - int ret, cache, checkalias; + int ret, cache; memset(&cpa, 0, sizeof(cpa)); @@ -1805,10 +1826,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.curpage = 0; cpa.force_split = force_split; - /* No alias checking for _NX bit modifications */ - checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - - ret = __change_page_attr_set_clr(&cpa, checkalias); + ret = __change_page_attr_set_clr(&cpa, 1); /* * Check whether we really changed something: -- GitLab From e996365ee7475805d2a01312532855004e89df84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:34:00 +0100 Subject: [PATCH 629/694] x86/mm: Rename __change_page_attr_set_clr(.checkalias) Now that the checkalias functionality is taken by CPA_NO_CHECK_ALIAS rename the argument to better match is remaining purpose: primary, matching __change_page_attr(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.661001508%40infradead.org --- arch/x86/mm/pat/set_memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index beef774171154..220361ceb9977 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1641,7 +1641,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) return err; } -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); +static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary); /* * Check the directmap and "high kernel map" 'aliases'. @@ -1718,7 +1718,7 @@ static int cpa_process_alias(struct cpa_data *cpa) return 0; } -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) +static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary) { unsigned long numpages = cpa->numpages; unsigned long rempages = numpages; @@ -1742,13 +1742,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); - ret = __change_page_attr(cpa, checkalias); + ret = __change_page_attr(cpa, primary); if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); if (ret) goto out; - if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { + if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { ret = cpa_process_alias(cpa); if (ret) goto out; -- GitLab From 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:00 +0000 Subject: [PATCH 630/694] x86/mm: Recompute physical address for every page of per-CPU CEA mapping Recompute the physical address for each per-CPU page in the CPU entry area, a recent commit inadvertantly modified cea_map_percpu_pages() such that every PTE is mapped to the physical address of the first page. Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com --- arch/x86/mm/cpu_entry_area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index dff9001e5e125..d831aae94b419 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -97,7 +97,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) early_pfn_to_nid(PFN_DOWN(pa))); for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, pa, prot); + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } static void __init percpu_setup_debug_store(unsigned int cpu) -- GitLab From 97650148a15e0b30099d6175ffe278b9f55ec66a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:01 +0000 Subject: [PATCH 631/694] x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area Populate a KASAN shadow for the entire possible per-CPU range of the CPU entry area instead of requiring that each individual chunk map a shadow. Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping was left behind, which can lead to not-present page faults during KASAN validation if the kernel performs a software lookup into the GDT. The DS buffer is also likely affected. The motivation for mapping the per-CPU areas on-demand was to avoid mapping the entire 512GiB range that's reserved for the CPU entry area, shaving a few bytes by not creating shadows for potentially unused memory was not a goal. The bug is most easily reproduced by doing a sigreturn with a garbage CS in the sigcontext, e.g. int main(void) { struct sigcontext regs; syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); memset(®s, 0, sizeof(regs)); regs.cs = 0x1d0; syscall(__NR_rt_sigreturn); return 0; } to coerce the kernel into doing a GDT lookup to compute CS.base when reading the instruction bytes on the subsequent #GP to determine whether or not the #GP is something the kernel should handle, e.g. to fixup UMIP violations or to emulate CLI/STI for IOPL=3 applications. BUG: unable to handle page fault for address: fffffbc8379ace00 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 Call Trace: get_desc+0xb0/0x1d0 insn_get_seg_base+0x104/0x270 insn_fetch_from_user+0x66/0x80 fixup_umip_exception+0xb1/0x530 exc_general_protection+0x181/0x210 asm_exc_general_protection+0x22/0x30 RIP: 0003:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0003:0000000000000000 EFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com Suggested-by: Andrey Ryabinin Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com --- arch/x86/mm/cpu_entry_area.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index d831aae94b419..7c855dffcdc26 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -91,11 +91,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - phys_addr_t pa = per_cpu_ptr_to_phys(ptr); - - kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, - early_pfn_to_nid(PFN_DOWN(pa))); - for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } @@ -195,6 +190,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); cea_map_percpu_pages(&cea->entry_stack_page, -- GitLab From 7077d2ccb94dafd00b29cc2d601c9f6891648f5b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:02 +0000 Subject: [PATCH 632/694] x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names Rename the CPU entry area variables in kasan_init() to shorten their names, a future fix will reference the beginning of the per-CPU portion of the CPU entry area, and shadow_cpu_entry_per_cpu_begin is a bit much. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-4-seanjc@google.com --- arch/x86/mm/kasan_init_64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d1416926ad526..ad7872ae10ed6 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -331,7 +331,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { int i; - void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; + void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -372,16 +372,16 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); - shadow_cpu_entry_begin = (void *)round_down( - (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); + shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; + shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); + shadow_cea_begin = (void *)round_down( + (unsigned long)shadow_cea_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); - shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); - shadow_cpu_entry_end = (void *)round_up( - (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); + shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); + shadow_cea_end = (void *)round_up( + (unsigned long)shadow_cea_end, PAGE_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -403,9 +403,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cpu_entry_begin); + shadow_cea_begin); - kasan_populate_early_shadow(shadow_cpu_entry_end, + kasan_populate_early_shadow(shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- GitLab From bde258d97409f2a45243cb393a55ea9ecfc7aba5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:03 +0000 Subject: [PATCH 633/694] x86/kasan: Add helpers to align shadow addresses up and down Add helpers to dedup code for aligning shadow address up/down to page boundaries when translating an address to its shadow. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-5-seanjc@google.com --- arch/x86/mm/kasan_init_64.c | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index ad7872ae10ed6..afc5e129ca7be 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -316,22 +316,33 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_down(shadow, PAGE_SIZE); +} + +static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_up(shadow, PAGE_SIZE); +} + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) { unsigned long shadow_start, shadow_end; - shadow_start = (unsigned long)kasan_mem_to_shadow(va); - shadow_start = round_down(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); - shadow_end = round_up(shadow_end, PAGE_SIZE); - + shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); + shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); kasan_populate_shadow(shadow_start, shadow_end, nid); } void __init kasan_init(void) { + unsigned long shadow_cea_begin, shadow_cea_end; int i; - void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -372,16 +383,9 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); - shadow_cea_begin = (void *)round_down( - (unsigned long)shadow_cea_begin, PAGE_SIZE); - - shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + - CPU_ENTRY_AREA_MAP_SIZE); - shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); - shadow_cea_end = (void *)round_up( - (unsigned long)shadow_cea_end, PAGE_SIZE); + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -403,9 +407,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cea_begin); + (void *)shadow_cea_begin); - kasan_populate_early_shadow(shadow_cea_end, + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- GitLab From 1cfaac2400c73378e78182a706be0f3ac8b93cd7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:04 +0000 Subject: [PATCH 634/694] x86/kasan: Populate shadow for shared chunk of the CPU entry area Popuplate the shadow for the shared portion of the CPU entry area, i.e. the read-only IDT mapping, during KASAN initialization. A recent change modified KASAN to map the per-CPU areas on-demand, but forgot to keep a shadow for the common area that is shared amongst all CPUs. Map the common area in KASAN init instead of letting idt_map_in_cea() do the dirty work so that it Just Works in the unlikely event more shared data is shoved into the CPU entry area. The bug manifests as a not-present #PF when software attempts to lookup an IDT entry, e.g. when KVM is handling IRQs on Intel CPUs (KVM performs direct CALL to the IRQ handler to avoid the overhead of INTn): BUG: unable to handle page fault for address: fffffbc0000001d8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 5 PID: 901 Comm: repro Tainted: G W 6.1.0-rc3+ #410 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 vmx_handle_exit_irqoff+0x152/0x290 [kvm_intel] vcpu_run+0x1d89/0x2bd0 [kvm] kvm_arch_vcpu_ioctl_run+0x3ce/0xa70 [kvm] kvm_vcpu_ioctl+0x349/0x900 [kvm] __x64_sys_ioctl+0xb8/0xf0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+8cdd16fd5a6c0565e227@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110203504.1985010-6-seanjc@google.com --- arch/x86/mm/kasan_init_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index afc5e129ca7be..0302491d799d1 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -341,7 +341,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { - unsigned long shadow_cea_begin, shadow_cea_end; + unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; int i; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -384,6 +384,7 @@ void __init kasan_init(void) } shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); @@ -409,6 +410,15 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), (void *)shadow_cea_begin); + /* + * Populate the shadow for the shared portion of the CPU entry area. + * Shadows for the per-CPU areas are mapped on-demand, as each CPU's + * area is randomly placed somewhere in the 512GiB range and mapping + * the entire 512GiB range is prohibitively expensive. + */ + kasan_populate_shadow(shadow_cea_begin, + shadow_cea_per_cpu_begin, 0); + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- GitLab From 3e844d842d49cdbe61a4b338bdd512654179488a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 18 Nov 2022 07:16:16 -0800 Subject: [PATCH 635/694] x86/mm: Ensure forced page table splitting There are a few kernel users like kfence that require 4k pages to work correctly and do not support large mappings. They use set_memory_4k() to break down those large mappings. That, in turn relies on cpa_data->force_split option to indicate to set_memory code that it should split page tables regardless of whether the need to be. But, a recent change added an optimization which would return early if a set_memory request came in that did not change permissions. It did not consult ->force_split and would mistakenly optimize away the splitting that set_memory_4k() needs. This broke kfence. Skip the same-permission optimization when ->force_split is set. Fixes: 127960a05548 ("x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()") Signed-off-by: Dave Hansen Tested-by: Marco Elver Cc: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/CA+G9fYuFxZTxkeS35VTZMXwQvohu73W3xbZ5NtjebsVvH6hCuA@mail.gmail.com/ --- arch/x86/mm/pat/set_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 220361ceb9977..0db69514fe291 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1727,7 +1727,8 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary) /* * No changes, easy! */ - if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr))) + if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) && + !cpa->force_split) return ret; while (rempages) { -- GitLab From bb0a1799bbc498053011729dcac0d9430e7d750a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 24 Nov 2022 15:43:59 +0100 Subject: [PATCH 636/694] rtc: isl12026: drop obsolete dependency on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it is possible to test-build any driver which depends on OF on any architecture by explicitly selecting OF. Therefore depending on COMPILE_TEST as an alternative is no longer needed. It is actually better to always build such drivers with OF enabled, so that the test builds are closer to how each driver will actually be built on its intended target. Building them without OF may not test much as the compiler will optimize out potentially large parts of the code. In the worst case, this could even pop false positive warnings. Dropping COMPILE_TEST here improves the quality of our testing and avoids wasting time on non-existent issues. Signed-off-by: Jean Delvare Cc: Alessandro Zummo Cc: Alexandre Belloni Link: https://lore.kernel.org/r/20221124154359.039be06c@endymion.delvare Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index b45fd08d51dc2..2bb640d1521d8 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -433,7 +433,7 @@ config RTC_DRV_ISL12022 config RTC_DRV_ISL12026 tristate "Intersil ISL12026" - depends on OF || COMPILE_TEST + depends on OF help If you say yes here you get support for the Intersil ISL12026 RTC chip. -- GitLab From 0feebdeb3acc0375cd817385d637d27a8a6dca97 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Mon, 5 Dec 2022 11:34:45 +0800 Subject: [PATCH 637/694] rtc: ds1307: use sysfs_emit() to instead of scnprintf() Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/202212051134455911470@zte.com.cn Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1307.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 7c2276cf5514a..def9b7f9d9577 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1219,8 +1219,7 @@ static ssize_t frequency_test_show(struct device *dev, regmap_read(ds1307->regmap, M41TXX_REG_CONTROL, &ctrl_reg); - return scnprintf(buf, PAGE_SIZE, (ctrl_reg & M41TXX_BIT_FT) ? "on\n" : - "off\n"); + return sysfs_emit(buf, (ctrl_reg & M41TXX_BIT_FT) ? "on\n" : "off\n"); } static DEVICE_ATTR_RW(frequency_test); -- GitLab From 16b26f6027588be4414fb69d665bd07c9cb828e9 Mon Sep 17 00:00:00 2001 From: Wadim Egorov Date: Thu, 8 Dec 2022 14:36:05 +0100 Subject: [PATCH 638/694] rtc: rv3028: Use IRQ flags obtained from device tree if available Make the interrupt pin of the RV3028 usable with GPIO controllers without level type IRQs support, such as the TI Davinci GPIO controller. Therefore, allow the IRQ type to be passed from the device tree if available. Based on commit d4785b46345c ("rtc: pcf2127: use IRQ flags obtained from device tree if available") Signed-off-by: Wadim Egorov Link: https://lore.kernel.org/r/20221208133605.4193907-1-w.egorov@phytec.de Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rv3028.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c index dd170e3efd83e..b0099e26e3b05 100644 --- a/drivers/rtc/rtc-rv3028.c +++ b/drivers/rtc/rtc-rv3028.c @@ -902,9 +902,20 @@ static int rv3028_probe(struct i2c_client *client) return PTR_ERR(rv3028->rtc); if (client->irq > 0) { + unsigned long flags; + + /* + * If flags = 0, devm_request_threaded_irq() will use IRQ flags + * obtained from device tree. + */ + if (dev_fwnode(&client->dev)) + flags = 0; + else + flags = IRQF_TRIGGER_LOW; + ret = devm_request_threaded_irq(&client->dev, client->irq, NULL, rv3028_handle_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, + flags | IRQF_ONESHOT, "rv3028", rv3028); if (ret) { dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n"); -- GitLab From b9354487fb795c144a387e51a9d4d33b710c74f7 Mon Sep 17 00:00:00 2001 From: shaomin Deng Date: Mon, 8 Aug 2022 11:23:54 -0400 Subject: [PATCH 639/694] rtc: remove duplicated words in comments Signed-off-by: shaomin Deng Link: https://lore.kernel.org/r/20220808152354.3641-1-dengshaomin@cdjrlc.com Link: https://lore.kernel.org/r/20220808153454.6844-1-dengshaomin@cdjrlc.com Link: https://lore.kernel.org/r/20220808152822.5012-1-dengshaomin@cdjrlc.com Signed-off-by: Alexandre Belloni --- drivers/rtc/interface.c | 2 +- drivers/rtc/rtc-m41t80.c | 2 +- drivers/rtc/rtc-rs5c372.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 9edd662c69ace..7c30cb3c764d8 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -256,7 +256,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) * * This could all instead be done in the lower level driver, * but since more than one lower level RTC implementation needs it, - * then it's probably best best to do it here instead of there.. + * then it's probably best to do it here instead of there.. */ /* Get the "before" timestamp */ diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index d3144ffdebb58..494052dbd39ff 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -692,7 +692,7 @@ static void wdt_disable(void) * @ppos: pointer to the position to write. No seeks allowed * * A write to a watchdog device is defined as a keepalive signal. Any - * write of data will do, as we we don't define content meaning. + * write of data will do, as we don't define content meaning. */ static ssize_t wdt_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 5047afefcceb7..b4c5d016eca32 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -150,7 +150,7 @@ static int rs5c_get_regs(struct rs5c372 *rs5c) * least 80219 chips; this works around that bug. * * The third method on the other hand doesn't work for the SMBus-only - * configurations, so we use the the first method there, stripping off + * configurations, so we use the first method there, stripping off * the extra register in the process. */ if (rs5c->smbus) { -- GitLab From 2dc5e3fb6e974bc299cdd43cb9e253c8958d0d11 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Sun, 5 Jun 2022 16:35:15 +0800 Subject: [PATCH 640/694] rtc: at91rm9200: Fix syntax errors in comments Delete the redundant word 'is'. Signed-off-by: Xiang wangx Link: https://lore.kernel.org/r/20220605083515.9514-1-wangxiang@cdjrlc.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-at91rm9200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index fe396d27ebb71..e9d17232d0a81 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -130,7 +130,7 @@ static void at91_rtc_write_idr(u32 mask) * * Note that there is still a possibility that the mask is updated * before interrupts have actually been disabled in hardware. The only - * way to be certain would be to poll the IMR-register, which is is + * way to be certain would be to poll the IMR-register, which is * the very register we are trying to emulate. The register read back * is a reasonable heuristic. */ -- GitLab From 3bb23f1f9d5a66e23f643b2318a64d88f2585c8e Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Wed, 22 Jun 2022 16:53:44 +0800 Subject: [PATCH 641/694] rtc: rs5c313: correct some spelling mistakes Change 'modifed' to 'modified'. Change 'Updata' to 'Update'. Change 'Initiatlize' to 'Initialize'. Signed-off-by: Zhang Jiaming Link: https://lore.kernel.org/r/20220622085344.23519-1-jiaming@nfschina.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rs5c313.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c index e98f85f34206f..712a08e9e52db 100644 --- a/drivers/rtc/rtc-rs5c313.c +++ b/drivers/rtc/rtc-rs5c313.c @@ -2,7 +2,7 @@ * Ricoh RS5C313 RTC device/driver * Copyright (C) 2007 Nobuhiro Iwamatsu * - * 2005-09-19 modifed by kogiidena + * 2005-09-19 modified by kogiidena * * Based on the old drivers/char/rs5c313_rtc.c by: * Copyright (C) 2000 Philipp Rumpf @@ -36,7 +36,7 @@ * 1.11a Daniele Bellucci: Audit create_proc_read_entry in rtc_init * 1.12 Venkatesh Pallipadi: Hooks for emulating rtc on HPET base-timer * CONFIG_HPET_EMULATE_RTC - * 1.13 Nobuhiro Iwamatsu: Updata driver. + * 1.13 Nobuhiro Iwamatsu: Update driver. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -280,7 +280,7 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm) while (1) { RS5C313_CEENABLE; /* CE:H */ - /* Initiatlize control reg. 24 hour */ + /* Initialize control reg. 24 hour */ rs5c313_write_cntreg(0x04); if (!(rs5c313_read_cntreg() & RS5C313_CNTREG_ADJ_BSY)) -- GitLab From 55d5a86618d3b1a768bce01882b74cbbd2651975 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Tue, 22 Nov 2022 16:50:46 +0800 Subject: [PATCH 642/694] rtc: mxc_v2: Add missing clk_disable_unprepare() The call to clk_disable_unprepare() is left out in the error handling of devm_rtc_allocate_device. Add it back. Fixes: 5490a1e018a4 ("rtc: mxc_v2: fix possible race condition") Signed-off-by: GUO Zihua Link: https://lore.kernel.org/r/20221122085046.21689-1-guozihua@huawei.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-mxc_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c index 5e03834016294..f6d2ad91ff7a9 100644 --- a/drivers/rtc/rtc-mxc_v2.c +++ b/drivers/rtc/rtc-mxc_v2.c @@ -336,8 +336,10 @@ static int mxc_rtc_probe(struct platform_device *pdev) } pdata->rtc = devm_rtc_allocate_device(&pdev->dev); - if (IS_ERR(pdata->rtc)) + if (IS_ERR(pdata->rtc)) { + clk_disable_unprepare(pdata->clk); return PTR_ERR(pdata->rtc); + } pdata->rtc->ops = &mxc_rtc_ops; pdata->rtc->range_max = U32_MAX; -- GitLab From e88f319a2546fd7772c726bf3a82a23b0859ddeb Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Tue, 22 Nov 2022 09:47:19 +0800 Subject: [PATCH 643/694] rtc: ds1742: use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Minghao Chi Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/202211220947194856561@zte.com.cn Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds1742.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 13d45c697da68..a5026b0514e78 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -158,8 +158,7 @@ static int ds1742_rtc_probe(struct platform_device *pdev) if (!pdata) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ioaddr = devm_ioremap_resource(&pdev->dev, res); + ioaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(ioaddr)) return PTR_ERR(ioaddr); -- GitLab From 61b963b52f59524e27692bc1c14bfb2459e32eb3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Dec 2022 19:20:18 +0100 Subject: [PATCH 644/694] mm/gup_test: free memory allocated via kvcalloc() using kvfree() We have to free via kvfree(), not via kfree(). Link: https://lkml.kernel.org/r/20221212182018.264900-1-david@redhat.com Fixes: c77369b437f9 ("mm/gup_test: start/stop/read functionality for PIN LONGTERM test") Signed-off-by: David Hildenbrand Reported-by: kernel test robot Reported-by: Julia Lawall Signed-off-by: Andrew Morton --- mm/gup_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/gup_test.c b/mm/gup_test.c index 33f431e0da60b..8ae7307a1bb6e 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -214,7 +214,7 @@ static inline void pin_longterm_test_stop(void) if (pin_longterm_test_nr_pages) unpin_user_pages(pin_longterm_test_pages, pin_longterm_test_nr_pages); - kfree(pin_longterm_test_pages); + kvfree(pin_longterm_test_pages); pin_longterm_test_pages = NULL; pin_longterm_test_nr_pages = 0; } @@ -255,7 +255,7 @@ static inline int pin_longterm_test_start(unsigned long arg) fast = !!(args.flags & PIN_LONGTERM_TEST_FLAG_USE_FAST); if (!fast && mmap_read_lock_killable(current->mm)) { - kfree(pages); + kvfree(pages); return -EINTR; } -- GitLab From d98c86b9f7a4e1f5a7ead8ba5743952267f9e320 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Tue, 25 Oct 2022 17:38:14 +0000 Subject: [PATCH 645/694] maple_tree: fix mas_find_rev() comment mas_find_rev() uses mas_prev_entry(), not mas_next_entry(), correct comment. Link: https://lkml.kernel.org/r/20221025173756.2719616-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3fe1491d2bf98..fe3947b800690 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -6062,7 +6062,7 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min) if (mas->index < min) return NULL; - /* Retries on dead nodes handled by mas_next_entry */ + /* Retries on dead nodes handled by mas_prev_entry */ return mas_prev_entry(mas, min); } EXPORT_SYMBOL_GPL(mas_find_rev); -- GitLab From 9102b78b6f6ae6af3557114c265c266b312c1319 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Tue, 25 Oct 2022 17:37:23 +0000 Subject: [PATCH 646/694] maple_tree: update copyright dates for test code Add the span to the year of the development. Link: https://lkml.kernel.org/r/20221025173709.2718725-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox Cc: Joe Perches Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 2e91973fbaa66..81fa7ec2e66a1 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * maple_tree.c: Userspace shim for maple tree test-suite - * Copyright (c) 2018 Liam R. Howlett + * maple_tree.c: Userspace testing for maple tree test-suite + * Copyright (c) 2018-2022 Oracle Corporation + * Author: Liam R. Howlett * * Any tests that require internal knowledge of the tree or threads and other * difficult to handle in kernel tests. -- GitLab From 56a61617dd2276cbc56a6c868599716386d70041 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Thu, 27 Oct 2022 17:50:24 +0800 Subject: [PATCH 647/694] mm: use stack_depot for recording kmemleak's backtrace Using stack_depot to record kmemleak's backtrace which has been implemented on slub for reducing redundant information. [akpm@linux-foundation.org: fix build - remove now-unused __save_stack_trace()] [zhaoyang.huang@unisoc.com: v3] Link: https://lkml.kernel.org/r/1667101354-4669-1-git-send-email-zhaoyang.huang@unisoc.com [akpm@linux-foundation.org: fix v3 layout oddities] [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/1666864224-27541-1-git-send-email-zhaoyang.huang@unisoc.com Signed-off-by: Zhaoyang Huang Acked-by: Catalin Marinas Cc: ke.wang Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Zhaoyang Huang Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 + mm/kmemleak.c | 46 ++++++++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a93c45ebc3205..4a23b37d15ffc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -728,6 +728,7 @@ config DEBUG_KMEMLEAK select STACKTRACE if STACKTRACE_SUPPORT select KALLSYMS select CRC32 + select STACKDEPOT help Say Y here if you want to enable the memory leak detector. The memory allocation/freeing is traced in a way diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 2673329043545..dc7db6a2a0ddd 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include #include @@ -159,8 +160,7 @@ struct kmemleak_object { u32 checksum; /* memory ranges to be scanned inside an object (empty for all) */ struct hlist_head area_list; - unsigned long trace[MAX_TRACE]; - unsigned int trace_len; + depot_stack_handle_t trace_handle; unsigned long jiffies; /* creation timestamp */ pid_t pid; /* pid of the current task */ char comm[TASK_COMM_LEN]; /* executable name */ @@ -346,18 +346,21 @@ static void print_unreferenced(struct seq_file *seq, struct kmemleak_object *object) { int i; + unsigned long *entries; + unsigned int nr_entries; unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies); + nr_entries = stack_depot_fetch(object->trace_handle, &entries); warn_or_seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", - object->pointer, object->size); + object->pointer, object->size); warn_or_seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n", - object->comm, object->pid, object->jiffies, - msecs_age / 1000, msecs_age % 1000); + object->comm, object->pid, object->jiffies, + msecs_age / 1000, msecs_age % 1000); hex_dump_object(seq, object); warn_or_seq_printf(seq, " backtrace:\n"); - for (i = 0; i < object->trace_len; i++) { - void *ptr = (void *)object->trace[i]; + for (i = 0; i < nr_entries; i++) { + void *ptr = (void *)entries[i]; warn_or_seq_printf(seq, " [<%p>] %pS\n", ptr, ptr); } } @@ -370,15 +373,16 @@ static void print_unreferenced(struct seq_file *seq, static void dump_object_info(struct kmemleak_object *object) { pr_notice("Object 0x%08lx (size %zu):\n", - object->pointer, object->size); + object->pointer, object->size); pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", - object->comm, object->pid, object->jiffies); + object->comm, object->pid, object->jiffies); pr_notice(" min_count = %d\n", object->min_count); pr_notice(" count = %d\n", object->count); pr_notice(" flags = 0x%x\n", object->flags); pr_notice(" checksum = %u\n", object->checksum); pr_notice(" backtrace:\n"); - stack_trace_print(object->trace, object->trace_len, 4); + if (object->trace_handle) + stack_depot_print(object->trace_handle); } /* @@ -591,12 +595,18 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali return object; } -/* - * Save stack trace to the given array of MAX_TRACE size. - */ -static int __save_stack_trace(unsigned long *trace) +static noinline depot_stack_handle_t set_track_prepare(void) { - return stack_trace_save(trace, MAX_TRACE, 2); + depot_stack_handle_t trace_handle; + unsigned long entries[MAX_TRACE]; + unsigned int nr_entries; + + if (!kmemleak_initialized) + return 0; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); + trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); + + return trace_handle; } /* @@ -653,7 +663,7 @@ static void __create_object(unsigned long ptr, size_t size, } /* kernel backtrace */ - object->trace_len = __save_stack_trace(object->trace); + object->trace_handle = set_track_prepare(); raw_spin_lock_irqsave(&kmemleak_lock, flags); @@ -692,7 +702,6 @@ static void __create_object(unsigned long ptr, size_t size, rb_link_node(&object->rb_node, rb_parent, link); rb_insert_color(&object->rb_node, is_phys ? &object_phys_tree_root : &object_tree_root); - list_add_tail_rcu(&object->object_list, &object_list); out: raw_spin_unlock_irqrestore(&kmemleak_lock, flags); @@ -1091,7 +1100,7 @@ void __ref kmemleak_update_trace(const void *ptr) } raw_spin_lock_irqsave(&object->lock, flags); - object->trace_len = __save_stack_trace(object->trace); + object->trace_handle = set_track_prepare(); raw_spin_unlock_irqrestore(&object->lock, flags); put_object(object); @@ -2084,6 +2093,7 @@ void __init kmemleak_init(void) if (kmemleak_error) return; + stack_depot_init(); jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); -- GitLab From 3a6f33d86baa8103c80f62edd9393e9f7bf25d72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 8 Nov 2022 10:43:22 +0100 Subject: [PATCH 648/694] mm/kmemleak: use %pK to display kernel pointers in backtrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, %p is used to display kernel pointers in backtrace which result in a hashed value that is not usable to correlate the address for debug. Use %pK which will respect the kptr_restrict configuration value and thus allow to extract meaningful information from the backtrace. Link: https://lkml.kernel.org/r/20221108094322.73492-1-clement.leger@bootlin.com Signed-off-by: Clément Léger Cc: Alexandre Belloni Cc: Catalin Marinas Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index dc7db6a2a0ddd..92f670edbf518 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -361,7 +361,7 @@ static void print_unreferenced(struct seq_file *seq, for (i = 0; i < nr_entries; i++) { void *ptr = (void *)entries[i]; - warn_or_seq_printf(seq, " [<%p>] %pS\n", ptr, ptr); + warn_or_seq_printf(seq, " [<%pK>] %pS\n", ptr, ptr); } } -- GitLab From 8b777594d2341a82f00b57c020f8af05bded1178 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 13 Dec 2022 09:07:31 -0800 Subject: [PATCH 649/694] MAINTAINERS: zram: zsmalloc: Add an additional co-maintainer Move Sergey to co-maintainer for zram/zsmalloc since he has helped to contribute/review those areas actively for eight years, which is quite helpful. Since Nitin has been inactive for several years, it's time to move his name into CREDITS. Link: https://lkml.kernel.org/r/20221213170731.796121-1-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Nitin Gupta Signed-off-by: Andrew Morton --- CREDITS | 4 ++++ MAINTAINERS | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index 198f675c419e3..4e302a459ddf0 100644 --- a/CREDITS +++ b/CREDITS @@ -1439,6 +1439,10 @@ N: Justin Guyett E: jguyett@andrew.cmu.edu D: via-rhine net driver hacking +N: Nitin Gupta +E: ngupta@vflare.org +D: zsmalloc memory allocator and zram block device driver + N: Danny ter Haar E: dth@cistron.nl D: /proc/cpuinfo, reboot on panic , kernel pre-patch tester ;) diff --git a/MAINTAINERS b/MAINTAINERS index 096ae475e21cd..5c63d48847ab0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23002,8 +23002,7 @@ F: drivers/media/pci/zoran/ ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim -M: Nitin Gupta -R: Sergey Senozhatsky +M: Sergey Senozhatsky L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/admin-guide/blockdev/zram.rst @@ -23016,8 +23015,7 @@ F: drivers/tty/serial/zs.* ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR M: Minchan Kim -M: Nitin Gupta -R: Sergey Senozhatsky +M: Sergey Senozhatsky L: linux-mm@kvack.org S: Maintained F: Documentation/mm/zsmalloc.rst -- GitLab From a7ebbbb159c181c696770feeb89bf0334aaff6d8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Aug 2022 08:03:29 +0000 Subject: [PATCH 650/694] fault-injection: allow stacktrace filter for x86-64 This patchset allow fault injection to run on x86_64 and makes stacktrace filter work as expected. With this, we can test a device driver module with fault injection more easily. This patch (of 4): FAULT_INJECTION_STACKTRACE_FILTER option was apparently disallowed on x86_64 because of problems with the stack unwinder: commit 6d690dcac92a84f98fd774862628ff871b713660 Author: Akinobu Mita Date: Sat May 12 10:36:53 2007 -0700 fault injection: disable stacktrace filter for x86-64 However, there is no problems whatsoever with this today. Let's allow it again. Link: https://lkml.kernel.org/r/20220817080332.1052710-1-weiyongjun1@huawei.com Link: https://lkml.kernel.org/r/20220817080332.1052710-2-weiyongjun1@huawei.com Signed-off-by: Wei Yongjun Cc: Akinobu Mita Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Kees Cook Cc: Nick Desaulniers Cc: Josh Poimboeuf Cc: Dan Williams Cc: Miguel Ojeda Cc: Isabella Basso Cc: Vlastimil Babka Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a93c45ebc3205..5d19278f53c69 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1982,7 +1982,6 @@ config FAIL_SUNRPC config FAULT_INJECTION_STACKTRACE_FILTER bool "stacktrace filter for fault-injection capabilities" depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT - depends on !X86_64 select STACKTRACE depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86 help -- GitLab From 4acb9e5139f20c79eb08a95dc5a28186ae7a5088 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Aug 2022 08:03:30 +0000 Subject: [PATCH 651/694] fault-injection: skip stacktrace filtering by default If FAULT_INJECTION_STACKTRACE_FILTER is enabled, the depth is default to 32. This means fail_stacktrace() will iter each entry's stacktrace, even if filter is not configured. This patch changes to quick return from fail_stacktrace() if stacktrace filter is not set. Link: https://lkml.kernel.org/r/20220817080332.1052710-3-weiyongjun1@huawei.com Signed-off-by: Wei Yongjun Cc: Akinobu Mita Cc: Dan Williams Cc: Isabella Basso Cc: Josh Poimboeuf Cc: Kees Cook Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/fault-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 1421818c9ef75..fecc4d8ca32ae 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -71,7 +71,7 @@ static bool fail_stacktrace(struct fault_attr *attr) int n, nr_entries; bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); - if (depth == 0) + if (depth == 0 || (found && !attr->reject_start && !attr->reject_end)) return found; nr_entries = stack_trace_save(entries, depth, 1); -- GitLab From 0199907474d402809319ada802b50643625914f9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Aug 2022 08:03:31 +0000 Subject: [PATCH 652/694] fault-injection: make some stack filter attrs more readable Attributes of stack filter are show as unsigned decimal, such as 'require-start', 'require-end'. This patch change to show them as unsigned hexadecimal for more readable. Before: $ echo 0xffffffffc0257000 > /sys/kernel/debug/failslab/require-start $ cat /sys/kernel/debug/failslab/require-start 18446744072638263296 After: $ echo 0xffffffffc0257000 > /sys/kernel/debug/failslab/require-start $ cat /sys/kernel/debug/failslab/require-start 0xffffffffc0257000 [wangyufen@huawei.com: use debugfs_create_xul() instead of debugfs_create_xl()] Link: https://lkml.kernel.org/r/1664331299-4976-1-git-send-email-wangyufen@huawei.com Link: https://lkml.kernel.org/r/20220817080332.1052710-4-weiyongjun1@huawei.com Signed-off-by: Wei Yongjun Signed-off-by: Wang Yufen Reviewed-by: Akinobu Mita Cc: Dan Williams Cc: Isabella Basso Cc: Josh Poimboeuf Cc: Kees Cook Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/fault-inject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index fecc4d8ca32ae..d7819cacf8901 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -226,10 +226,10 @@ struct dentry *fault_create_debugfs_attr(const char *name, #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir, &attr->stacktrace_depth); - debugfs_create_ul("require-start", mode, dir, &attr->require_start); - debugfs_create_ul("require-end", mode, dir, &attr->require_end); - debugfs_create_ul("reject-start", mode, dir, &attr->reject_start); - debugfs_create_ul("reject-end", mode, dir, &attr->reject_end); + debugfs_create_xul("require-start", mode, dir, &attr->require_start); + debugfs_create_xul("require-end", mode, dir, &attr->require_end); + debugfs_create_xul("reject-start", mode, dir, &attr->reject_start); + debugfs_create_xul("reject-end", mode, dir, &attr->reject_end); #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ attr->dname = dget(dir); -- GitLab From f9eeef5918bbe1f2545d36280330dced25d6cf97 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Aug 2022 08:03:32 +0000 Subject: [PATCH 653/694] fault-injection: make stacktrace filter works as expected stacktrace filter is checked after others, such as fail-nth, interval and probability. This make it doesn't work well as expected. Fix to running stacktrace filter before other filters. It will speed up fault inject testing for driver modules. Link: https://lkml.kernel.org/r/20220817080332.1052710-5-weiyongjun1@huawei.com Signed-off-by: Wei Yongjun Cc: Akinobu Mita Cc: Dan Williams Cc: Isabella Basso Cc: Josh Poimboeuf Cc: Kees Cook Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/fault-inject.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index d7819cacf8901..6cff320c4eb40 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -102,10 +102,16 @@ static inline bool fail_stacktrace(struct fault_attr *attr) bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { + bool stack_checked = false; + if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); if (fail_nth) { + if (!fail_stacktrace(attr)) + return false; + + stack_checked = true; fail_nth--; WRITE_ONCE(current->fail_nth, fail_nth); if (!fail_nth) @@ -125,6 +131,9 @@ bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) if (atomic_read(&attr->times) == 0) return false; + if (!stack_checked && !fail_stacktrace(attr)) + return false; + if (atomic_read(&attr->space) > size) { atomic_sub(size, &attr->space); return false; @@ -139,9 +148,6 @@ bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) if (attr->probability <= get_random_u32_below(100)) return false; - if (!fail_stacktrace(attr)) - return false; - fail: if (!(flags & FAULT_NOWARN)) fail_dump(attr); -- GitLab From dba8eb83af9dd757ef645b52200775e86883d858 Mon Sep 17 00:00:00 2001 From: Chun-Jie Chen Date: Fri, 14 Oct 2022 18:20:29 +0800 Subject: [PATCH 654/694] soc: mediatek: pm-domains: Fix the power glitch issue Power reset maybe generate unexpected signal. In order to avoid the glitch issue, we need to enable isolation first to guarantee the stable signal when power reset is triggered. Fixes: 59b644b01cf4 ("soc: mediatek: Add MediaTek SCPSYS power domains") Signed-off-by: Chun-Jie Chen Signed-off-by: Allen-KH Cheng Reviewed-by: Chen-Yu Tsai Reviewed-by: Miles Chen Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221014102029.1162-1-allen-kh.cheng@mediatek.com Signed-off-by: Matthias Brugger --- drivers/soc/mediatek/mtk-pm-domains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index 09e3c38b84664..474b272f9b02d 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -275,9 +275,9 @@ static int scpsys_power_off(struct generic_pm_domain *genpd) clk_bulk_disable_unprepare(pd->num_subsys_clks, pd->subsys_clks); /* subsys power off */ - regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT); regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_ISO_BIT); regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_CLK_DIS_BIT); + regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT); regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_2ND_BIT); regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_BIT); -- GitLab From e4a4175201014c0222f6bab1895a17b3d1b92f08 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Fri, 11 Nov 2022 17:55:40 +0800 Subject: [PATCH 655/694] arm64: dts: mediatek: mt8195-demo: fix the memory size of node secmon The size of device tree node secmon (bl31_secmon_reserved) was incorrect. It should be increased to 2MiB (0x200000). The origin setting will cause some abnormal behavior due to trusted-firmware-a and related firmware didn't load correctly. The incorrect behavior may vary because of different software stacks. For example, it will cause build error in some Yocto project because it will check if there was enough memory to load trusted-firmware-a to the reserved memory. When mt8195-demo.dts sent to the upstream, at that time the size of BL31 was small. Because supported functions and modules in BL31 are basic sets when the board was under early development stage. Now BL31 includes more firmwares of coprocessors and maturer functions so the size has grown bigger in real applications. According to the value reported by customers, we think reserved 2MiB for BL31 might be enough for maybe the following 2 or 3 years. Cc: stable@vger.kernel.org # v5.19 Fixes: 6147314aeedc ("arm64: dts: mediatek: Add device-tree for MT8195 Demo board") Signed-off-by: Macpaul Lin Reviewed-by: Miles Chen Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221111095540.28881-1-macpaul.lin@mediatek.com Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 4fbd99eb496a2..dec85d2548384 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -56,10 +56,10 @@ #size-cells = <2>; ranges; - /* 192 KiB reserved for ARM Trusted Firmware (BL31) */ + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ bl31_secmon_reserved: secmon@54600000 { no-map; - reg = <0 0x54600000 0x0 0x30000>; + reg = <0 0x54600000 0x0 0x200000>; }; /* 12 MiB reserved for OP-TEE (BL32) -- GitLab From ad2631b5645a1d0ca9bf6fecf71f77e3b0071ee5 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 27 Sep 2022 12:11:19 +0200 Subject: [PATCH 656/694] arm64: dts: mt8183: Fix Mali GPU clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual clock feeding into the Mali GPU on the MT8183 is from the clock gate in the MFGCFG block, not CLK_TOP_MFGPLL_CK from the TOPCKGEN block, which itself is simply a pass-through placeholder for the MFGPLL in the APMIXEDSYS block. Fix the hardware description with the correct clock reference. Fixes: a8168cebf1bc ("arm64: dts: mt8183: Add node for the Mali GPU") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Tested-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220927101128.44758-2-angelogioacchino.delregno@collabora.com Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index a70b669c49baa..402136bfd5350 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1678,7 +1678,7 @@ ; interrupt-names = "job", "mmu", "gpu"; - clocks = <&topckgen CLK_TOP_MFGPLL_CK>; + clocks = <&mfgcfg CLK_MFG_BG3D>; power-domains = <&spm MT8183_POWER_DOMAIN_MFG_CORE0>, -- GitLab From 980411a4d1bb925d28cd9e8d8301dc982ece788d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 16 Dec 2022 12:43:12 +1100 Subject: [PATCH 657/694] powerpc/code-patching: Fix oops with DEBUG_VM enabled Nathan reported that the new per-cpu mm patching oopses if DEBUG_VM is enabled: ------------[ cut here ]------------ kernel BUG at arch/powerpc/mm/pgtable.c:333! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc2+ #1 Hardware name: IBM PowerNV (emulated by qemu) POWER9 0x4e1200 opal:v7.0 PowerNV ... NIP assert_pte_locked+0x180/0x1a0 LR assert_pte_locked+0x170/0x1a0 Call Trace: 0x60000000 (unreliable) patch_instruction+0x618/0x6d0 arch_prepare_kprobe+0xfc/0x2d0 register_kprobe+0x520/0x7c0 arch_init_kprobes+0x28/0x3c init_kprobes+0x108/0x184 do_one_initcall+0x60/0x2e0 kernel_init_freeable+0x1f0/0x3e0 kernel_init+0x34/0x1d0 ret_from_kernel_thread+0x5c/0x64 It's caused by the assert_spin_locked() failing in assert_pte_locked(). The assert fails because the PTE was unlocked in text_area_cpu_up_mm(), and never relocked. The PTE page shouldn't be freed, the patching_mm is only used for patching on this CPU, only that single PTE is ever mapped, and it's only unmapped at CPU offline. In fact assert_pte_locked() has a special case to ignore init_mm entirely, and the patching_mm is more-or-less like init_mm, so possibly the check could be skipped for patching_mm too. But for now be conservative, and use the proper PTE accessors at patching time, so that the PTE lock is held while the PTE is used. That also avoids the warning in assert_pte_locked(). With that it's no longer necessary to save the PTE in cpu_patching_context for the mm_patch_enabled() case. Fixes: c28c15b6d28a ("powerpc/code-patching: Use temporary mm for Radix MMU") Reported-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216125913.990972-1-mpe@ellerman.id.au --- arch/powerpc/lib/code-patching.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 73ce4b90bb1b8..b00112d7ad467 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -178,7 +178,6 @@ static int text_area_cpu_up_mm(unsigned int cpu) this_cpu_write(cpu_patching_context.mm, mm); this_cpu_write(cpu_patching_context.addr, addr); - this_cpu_write(cpu_patching_context.pte, pte); return 0; @@ -195,7 +194,6 @@ static int text_area_cpu_down_mm(unsigned int cpu) this_cpu_write(cpu_patching_context.mm, NULL); this_cpu_write(cpu_patching_context.addr, 0); - this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -289,12 +287,16 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) unsigned long pfn = get_patch_pfn(addr); struct mm_struct *patching_mm; struct mm_struct *orig_mm; + spinlock_t *ptl; patching_mm = __this_cpu_read(cpu_patching_context.mm); - pte = __this_cpu_read(cpu_patching_context.pte); text_poke_addr = __this_cpu_read(cpu_patching_context.addr); patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* order PTE update before use, also serves as the hwsync */ @@ -321,6 +323,8 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) */ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + pte_unmap_unlock(pte, ptl); + return err; } -- GitLab From db3568fd80a3999413c04ea0cf52596b7b0ad9aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 16 Dec 2022 11:03:02 +0000 Subject: [PATCH 658/694] genirq/msi: Check for the presence of an irq domain when validating msi_ctrl For architectures such as s390 and powerpc that do not use irq domains for MSIs, dev->msi.domain is always NULL, so the per-device, per-bus MSI domain is also guaranteed to be NULL. So checking one without checking the other is bound to result in a splat, followed by a memory leak as we don't free the MSI descriptors. Add the missing check. Reported-by: Matthew Rosato Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/e570e70d-19bc-101b-0481-ff9a3cab3504@linux.ibm.com --- kernel/irq/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index bd4d4dd626b4b..e843604c3a4f6 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -165,7 +165,8 @@ static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) unsigned int hwsize; if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || - !dev->msi.data->__domains[ctrl->domid].domain)) + (dev->msi.domain && + !dev->msi.data->__domains[ctrl->domid].domain))) return false; hwsize = msi_domain_get_hwsize(dev, ctrl->domid); -- GitLab From e982ad82bd8f7931f5788a15dfa3709f7a7ee79f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Dec 2022 11:08:52 +0000 Subject: [PATCH 659/694] genirq/msi: Return MSI_XA_DOMAIN_SIZE as the maximum MSI index when no domain is present On architectures such as s390 that do not use irq domains for MSI, returning 0 as the maximum MSI index is a bit counter-productive, as it indicates that no MSI can be allocated. Bad idea. Instead, return the maximum we're willing to support in the MSI backing store (MSI_XA_DOMAIN_SIZE), and let the arch code do its usual thing. Thanks to Matthew Rosato for fixing the fix. Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner [maz: commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/87fsdgzpqs.ffs@tglx --- kernel/irq/msi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index e843604c3a4f6..955267bbc2be6 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -610,8 +610,8 @@ static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid info = domain->host_data; return info->hwsize; } - /* No domain, no size... */ - return 0; + /* No domain, default to MSI_XA_DOMAIN_SIZE */ + return MSI_XA_DOMAIN_SIZE; } static inline void irq_chip_write_msi_msg(struct irq_data *data, -- GitLab From a7d550f82b445cf218b47a2c1a9c56e97ecb8c7a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 12 Dec 2022 00:58:17 +0100 Subject: [PATCH 660/694] of: fdt: Honor CONFIG_CMDLINE* even without /chosen node I do not read a strict requirement on /chosen node in either ePAPR or in Documentation/devicetree. Help text for CONFIG_CMDLINE and CONFIG_CMDLINE_EXTEND doesn't make their behavior explicitly dependent on the presence of /chosen or the presense of /chosen/bootargs. However the early check for /chosen and bailing out in early_init_dt_scan_chosen() skips CONFIG_CMDLINE handling which is not really related to /chosen node or the particular method of passing cmdline from bootloader. This leads to counterintuitive combinations (assuming CONFIG_CMDLINE_EXTEND=y): a) bootargs="foo", CONFIG_CMDLINE="bar" => cmdline=="foo bar" b) /chosen missing, CONFIG_CMDLINE="bar" => cmdline=="" c) bootargs="", CONFIG_CMDLINE="bar" => cmdline==" bar" Move CONFIG_CMDLINE handling outside of early_init_dt_scan_chosen() so that cases b and c above result in the same cmdline. Signed-off-by: Alexander Sverdlin Reviewed-by: Linus Walleij Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/11af73e05bad75e4ef49067515e3214f6d944b3d.camel@gmail.com Signed-off-by: Rob Herring --- drivers/of/fdt.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 7b571a6316397..b2272bccf85c9 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1173,26 +1173,6 @@ int __init early_init_dt_scan_chosen(char *cmdline) if (p != NULL && l > 0) strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); - /* - * CONFIG_CMDLINE is meant to be a default in case nothing else - * managed to set the command line, unless CONFIG_CMDLINE_FORCE - * is set in which case we override whatever was found earlier. - */ -#ifdef CONFIG_CMDLINE -#if defined(CONFIG_CMDLINE_EXTEND) - strlcat(cmdline, " ", COMMAND_LINE_SIZE); - strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#elif defined(CONFIG_CMDLINE_FORCE) - strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#else - /* No arguments from boot loader, use kernel's cmdl*/ - if (!((char *)cmdline)[0]) - strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif -#endif /* CONFIG_CMDLINE */ - - pr_debug("Command line is: %s\n", (char *)cmdline); - rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); if (rng_seed && l > 0) { add_bootloader_randomness(rng_seed, l); @@ -1297,6 +1277,26 @@ void __init early_init_dt_scan_nodes(void) if (rc) pr_warn("No chosen node found, continuing without\n"); + /* + * CONFIG_CMDLINE is meant to be a default in case nothing else + * managed to set the command line, unless CONFIG_CMDLINE_FORCE + * is set in which case we override whatever was found earlier. + */ +#ifdef CONFIG_CMDLINE +#if defined(CONFIG_CMDLINE_EXTEND) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#elif defined(CONFIG_CMDLINE_FORCE) + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#else + /* No arguments from boot loader, use kernel's cmdl */ + if (!boot_command_line[0]) + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif +#endif /* CONFIG_CMDLINE */ + + pr_debug("Command line is: %s\n", boot_command_line); + /* Setup memory, calling early_init_dt_add_memory_arch */ early_init_dt_scan_memory(); -- GitLab From d8a76e46d7d3d6db5458d7f46205153ca9a53afd Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 30 Nov 2022 16:41:11 +0100 Subject: [PATCH 661/694] dt-bindings: usb: tegra-xusb: Remove path references Unresolved path references are now flagged as errors when checking the device tree binding examples, so convert them into label references. Reported-by: Conor Dooley Suggested-by: Rob Herring Signed-off-by: Thierry Reding Acked-by: Rob Herring Tested-by: Geert Uytterhoeven Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221130154111.1655603-1-thierry.reding@gmail.com Signed-off-by: Rob Herring --- .../devicetree/bindings/usb/nvidia,tegra124-xusb.yaml | 4 +--- .../devicetree/bindings/usb/nvidia,tegra186-xusb.yaml | 4 +--- .../devicetree/bindings/usb/nvidia,tegra194-xusb.yaml | 8 ++------ .../devicetree/bindings/usb/nvidia,tegra210-xusb.yaml | 8 ++------ 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml index 4a6616bf9bab7..d6ca8c93073df 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml @@ -186,9 +186,7 @@ examples: nvidia,xusb-padctl = <&padctl>; - phys = <&{/padctl@0,7009f000/pads/usb2/lanes/usb2-1}>, /* mini-PCIe USB */ - <&{/padctl@0,7009f000/pads/usb2/lanes/usb2-2}>, /* USB A */ - <&{/padctl@0,7009f000/pads/pcie/lanes/pcie-0}>; /* USB A */ + phys = <&phy_usb2_1>, <&phy_usb2_2>, <&phy_pcie_0>; phy-names = "usb2-1", "usb2-2", "usb3-0"; avddio-pex-supply = <&vdd_1v05_run>; diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml index 6f62944fc5971..a04c6ce1e0f68 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml @@ -166,8 +166,6 @@ examples: #address-cells = <1>; #size-cells = <0>; - phys = <&{/padctl@3520000/pads/usb2/lanes/usb2-0}>, - <&{/padctl@3520000/pads/usb2/lanes/usb2-1}>, - <&{/padctl@3520000/pads/usb3/lanes/usb3-0}>; + phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb3_0>; phy-names = "usb2-0", "usb2-1", "usb3-0"; }; diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml index 65ae9ae9b0b7a..b356793f73a15 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml @@ -169,11 +169,7 @@ examples: nvidia,xusb-padctl = <&xusb_padctl>; - phys = <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-0}>, - <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-1}>, - <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-3}>, - <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-0}>, - <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-2}>, - <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-3}>; + phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb2_3>, <&phy_usb3_0>, + <&phy_usb3_2>, <&phy_usb3_3>; phy-names = "usb2-0", "usb2-1", "usb2-3", "usb3-0", "usb3-2", "usb3-3"; }; diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml index da1e1ec0e7c86..90296613b3a59 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml @@ -173,12 +173,8 @@ examples: nvidia,xusb-padctl = <&padctl>; - phys = <&{/padctl@7009f000/pads/usb2/lanes/usb2-0}>, - <&{/padctl@7009f000/pads/usb2/lanes/usb2-1}>, - <&{/padctl@7009f000/pads/usb2/lanes/usb2-2}>, - <&{/padctl@7009f000/pads/usb2/lanes/usb2-3}>, - <&{/padctl@7009f000/pads/pcie/lanes/pcie-6}>, - <&{/padctl@7009f000/pads/pcie/lanes/pcie-5}>; + phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb2_2>, <&phy_usb2_3>, + <&phy_pcie_6>, <&phy_pcie_5>; phy-names = "usb2-0", "usb2-1", "usb2-2", "usb2-3", "usb3-0", "usb3-1"; dvddio-pex-supply = <&vdd_pex_1v05>; -- GitLab From 5fa9f7292b17bcd66da93226fc28fe1f755af154 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 2 Dec 2022 12:05:36 +0100 Subject: [PATCH 662/694] dt-bindings: vendor-prefixes: sort entries alphabetically Sort entries alphabetically. This was a semi manual job with help of: cat Documentation/devicetree/bindings/vendor-prefixes.yaml | grep '":' > old cat old | sort > new diff -ubB old new Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221202110536.22230-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/vendor-prefixes.yaml | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 00493b962bd76..70ffb3780621b 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -198,10 +198,10 @@ patternProperties: description: Bosch Sensortec GmbH "^boundary,.*": description: Boundary Devices Inc. - "^broadmobi,.*": - description: Shanghai Broadmobi Communication Technology Co.,Ltd. "^brcm,.*": description: Broadcom Corporation + "^broadmobi,.*": + description: Shanghai Broadmobi Communication Technology Co.,Ltd. "^bsh,.*": description: BSH Hausgeraete GmbH "^bticino,.*": @@ -549,6 +549,8 @@ patternProperties: description: Hitex Development Tools "^holt,.*": description: Holt Integrated Circuits, Inc. + "^holtek,.*": + description: Holtek Semiconductor, Inc. "^honestar,.*": description: Honestar Technologies Co., Ltd. "^honeywell,.*": @@ -561,8 +563,6 @@ patternProperties: description: Hewlett Packard Enterprise "^hsg,.*": description: HannStar Display Co. - "^holtek,.*": - description: Holtek Semiconductor, Inc. "^huawei,.*": description: Huawei Technologies Co., Ltd. "^hugsun,.*": @@ -607,12 +607,10 @@ patternProperties: description: Infineon Technologies "^inforce,.*": description: Inforce Computing - "^ingrasys,.*": - description: Ingrasys Technology Inc. - "^ivo,.*": - description: InfoVision Optoelectronics Kunshan Co. Ltd. "^ingenic,.*": description: Ingenic Semiconductor + "^ingrasys,.*": + description: Ingrasys Technology Inc. "^injoinic,.*": description: Injoinic Technology Corp. "^innocomm,.*": @@ -649,6 +647,8 @@ patternProperties: description: ITEAD Intelligent Systems Co.Ltd "^itian,.*": description: ITian Corporation + "^ivo,.*": + description: InfoVision Optoelectronics Kunshan Co. Ltd. "^iwave,.*": description: iWave Systems Technologies Pvt. Ltd. "^jadard,.*": @@ -897,14 +897,14 @@ patternProperties: description: Shenzhen Netxeon Technology CO., LTD "^neweast,.*": description: Guangdong Neweast Optoelectronics CO., LTD + "^newhaven,.*": + description: Newhaven Display International "^newvision,.*": description: New Vision Display (Shenzhen) Co., Ltd. "^nexbox,.*": description: Nexbox "^nextthing,.*": description: Next Thing Co. - "^newhaven,.*": - description: Newhaven Display International "^ni,.*": description: National Instruments "^nintendo,.*": @@ -1053,10 +1053,10 @@ patternProperties: description: QEMU, a generic and open source machine emulator and virtualizer "^qi,.*": description: Qi Hardware - "^qihua,.*": - description: Chengdu Kaixuan Information Technology Co., Ltd. "^qiaodian,.*": description: QiaoDian XianShi Corporation + "^qihua,.*": + description: Chengdu Kaixuan Information Technology Co., Ltd. "^qishenglong,.*": description: Shenzhen QiShenglong Industrialist Co., Ltd. "^qnap,.*": @@ -1083,22 +1083,22 @@ patternProperties: description: reMarkable AS "^renesas,.*": description: Renesas Electronics Corporation - "^rex,.*": - description: iMX6 Rex Project "^rervision,.*": description: Shenzhen Rervision Technology Co., Ltd. "^revotics,.*": description: Revolution Robotics, Inc. (Revotics) + "^rex,.*": + description: iMX6 Rex Project "^richtek,.*": description: Richtek Technology Corporation "^ricoh,.*": description: Ricoh Co. Ltd. "^rikomagic,.*": description: Rikomagic Tech Corp. Ltd - "^riscv,.*": - description: RISC-V Foundation "^riot,.*": description: Embest RIoT + "^riscv,.*": + description: RISC-V Foundation "^rockchip,.*": description: Fuzhou Rockchip Electronics Co., Ltd "^rocktech,.*": @@ -1161,6 +1161,8 @@ patternProperties: description: Si-En Technology Ltd. "^si-linux,.*": description: Silicon Linux Corporation + "^siemens,.*": + description: Siemens AG "^sifive,.*": description: SiFive, Inc. "^sigma,.*": @@ -1183,8 +1185,8 @@ patternProperties: description: Siliconfile Technologies lnc. "^siliconmitus,.*": description: Silicon Mitus, Inc. - "^siemens,.*": - description: Siemens AG + "^silvaco,.*": + description: Silvaco, Inc. "^simtek,.*": description: Cypress Semiconductor Corporation (Simtek Corporation) "^sinlinx,.*": @@ -1270,8 +1272,6 @@ patternProperties: description: Sun Microsystems, Inc "^supermicro,.*": description: Super Micro Computer, Inc. - "^silvaco,.*": - description: Silvaco, Inc. "^swir,.*": description: Sierra Wireless "^syna,.*": @@ -1293,16 +1293,18 @@ patternProperties: description: Shenzhen City Tang Cheng Technology Co., Ltd. "^tdo,.*": description: Shangai Top Display Optoelectronics Co., Ltd + "^team-source-display,.*": + description: Shenzhen Team Source Display Technology Co., Ltd. (TSD) "^technexion,.*": description: TechNexion "^technologic,.*": description: Technologic Systems + "^techstar,.*": + description: Shenzhen Techstar Electronics Co., Ltd. "^teltonika,.*": description: Teltonika Networks "^tempo,.*": description: Tempo Semiconductor - "^techstar,.*": - description: Shenzhen Techstar Electronics Co., Ltd. "^terasic,.*": description: Terasic Inc. "^tesla,.*": @@ -1356,10 +1358,6 @@ patternProperties: description: Tronsmart "^truly,.*": description: Truly Semiconductors Limited - "^visionox,.*": - description: Visionox - "^team-source-display,.*": - description: Shenzhen Team Source Display Technology Co., Ltd. (TSD) "^tsd,.*": description: Theobroma Systems Design und Consulting GmbH "^tyan,.*": @@ -1368,10 +1366,10 @@ patternProperties: description: u-blox "^u-boot,.*": description: U-Boot bootloader - "^ucrobotics,.*": - description: uCRobotics "^ubnt,.*": description: Ubiquiti Networks + "^ucrobotics,.*": + description: uCRobotics "^udoo,.*": description: Udoo "^ugoos,.*": @@ -1410,6 +1408,8 @@ patternProperties: description: Used for virtual device without specific vendor. "^vishay,.*": description: Vishay Intertechnology, Inc + "^visionox,.*": + description: Visionox "^vitesse,.*": description: Vitesse Semiconductor Corporation "^vivante,.*": @@ -1424,6 +1424,8 @@ patternProperties: description: Vision Optical Technology Co., Ltd. "^vxt,.*": description: VXT Ltd + "^wanchanglong,.*": + description: Wanchanglong Electronics Technology(SHENZHEN)Co.,Ltd. "^wand,.*": description: Wandbord (Technexion) "^waveshare,.*": @@ -1464,8 +1466,6 @@ patternProperties: description: Wondermedia Technologies, Inc. "^wobo,.*": description: Wobo - "^wanchanglong,.*": - description: Wanchanglong Electronics Technology(SHENZHEN)Co.,Ltd. "^x-powers,.*": description: X-Powers "^xen,.*": @@ -1508,10 +1508,10 @@ patternProperties: description: Shenzhen Yashi Changhua Intelligent Technology Co., Ltd. "^ysoft,.*": description: Y Soft Corporation a.s. - "^zealz,.*": - description: Zealz "^zarlink,.*": description: Zarlink Semiconductor + "^zealz,.*": + description: Zealz "^zeitec,.*": description: ZEITEC Semiconductor Co., LTD. "^zidoo,.*": -- GitLab From f62678a77d585cf3f6302a5a4e2265b1a97b004b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 13 Dec 2022 14:20:06 +0100 Subject: [PATCH 663/694] dt-bindings: mxsfb: Document i.MX8M/i.MX6SX/i.MX6SL power-domains property The power-domains property is mandatory on i.MX8M Mini, Nano, Plus and i.MX6SX, i.MX6SL. Document the property and mark it as required on the aforementioned variants of the IP, present in those SoCs. Signed-off-by: Marek Vasut Reviewed-by: Krzysztof Kozlowski Reviewed-by: Liu Ying Link: https://lore.kernel.org/r/20221213132006.6446-1-marex@denx.de Signed-off-by: Rob Herring --- .../devicetree/bindings/display/fsl,lcdif.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml index f449cfc767899..75b4efd70ba85 100644 --- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml +++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml @@ -99,7 +99,6 @@ allOf: maxItems: 3 required: - clock-names - - power-domains - if: not: properties: @@ -114,6 +113,19 @@ allOf: maxItems: 1 clock-names: maxItems: 1 + - if: + properties: + compatible: + contains: + enum: + - fsl,imx6sl-lcdif + - fsl,imx6sx-lcdif + - fsl,imx8mm-lcdif + - fsl,imx8mn-lcdif + - fsl,imx8mp-lcdif + then: + required: + - power-domains examples: - | @@ -128,6 +140,7 @@ examples: <&clks IMX6SX_CLK_LCDIF_APB>, <&clks IMX6SX_CLK_DISPLAY_AXI>; clock-names = "pix", "axi", "disp_axi"; + power-domains = <&pd_disp>; port { endpoint { -- GitLab From 435beb4110da372c313398891ca9eee87f83d3db Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 13 Dec 2022 11:06:26 +0100 Subject: [PATCH 664/694] dt-bindings: hwmon: ntc-thermistor: drop Naveen Krishna Chatradhi from maintainers Emails to Naveen Krishna Chatradhi bounce ("550 5.1.1 Recipient address rejected: User unknown"). Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221213100626.36150-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml index 3d3b139a91a2f..6a1920712fb9f 100644 --- a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml +++ b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml @@ -6,7 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: NTC thermistor temperature sensors maintainers: - - Naveen Krishna Chatradhi - Linus Walleij description: | -- GitLab From 22c9f19002c7536b30bc15b6fb6276d62be2f758 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 11 Dec 2022 03:48:57 +0100 Subject: [PATCH 665/694] dt-bindings: imx6q-pcie: Handle various clock configurations The i.MX SoCs have various clock configurations routed into the PCIe IP, the list of clock is below. Document all those configurations in the DT binding document. All SoCs: pcie, pcie_bus 6QDL, 7D: + pcie_phy 6SX: + pcie_phy pcie_inbound_axi 8MQ: + pcie_phy pcie_aux 8MM, 8MP: + pcie_aux Reviewed-by: Rob Herring Acked-by: Alexander Stein Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221211024859.672076-1-marex@denx.de Signed-off-by: Rob Herring --- .../bindings/pci/fsl,imx6q-pcie.yaml | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml index 49b4f7a32e71e..5d74d914a830a 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml @@ -57,7 +57,7 @@ properties: items: - const: pcie - const: pcie_bus - - const: pcie_phy + - enum: [ pcie_phy, pcie_aux ] - enum: [ pcie_inbound_axi, pcie_aux ] num-lanes: @@ -185,7 +185,7 @@ allOf: items: - {} - {} - - {} + - const: pcie_phy - const: pcie_inbound_axi - if: properties: @@ -198,7 +198,7 @@ allOf: items: - {} - {} - - {} + - const: pcie_phy - const: pcie_aux - if: properties: @@ -208,10 +208,41 @@ allOf: enum: - fsl,imx6sx-pcie - fsl,imx8mq-pcie + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + + - if: + properties: + compatible: + contains: + enum: + - fsl,imx6q-pcie + - fsl,imx6qp-pcie + - fsl,imx7d-pcie + then: + properties: + clock-names: + maxItems: 3 + contains: + const: pcie_phy + + - if: + properties: + compatible: + contains: + enum: + - fsl,imx8mm-pcie + - fsl,imx8mp-pcie then: properties: clock-names: maxItems: 3 + contains: + const: pcie_aux unevaluatedProperties: false -- GitLab From 8b8161edf14acab716c01d03bafc8a3e8113a43a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 11 Dec 2022 03:48:58 +0100 Subject: [PATCH 666/694] dt-bindings: imx6q-pcie: Handle various PD configurations The i.MX SoCs have various power domain configurations routed into the PCIe IP. MX6SX is the only one which contains 2 domains and also uses power-domain-names. MX6QDL do not use any domains. All the rest uses one domain and does not use power-domain-names anymore. Document all those configurations in the DT binding document. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221211024859.672076-2-marex@denx.de Signed-off-by: Rob Herring --- .../bindings/pci/fsl,imx6q-pcie.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml index 5d74d914a830a..f2e9404d33f8a 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml @@ -69,6 +69,7 @@ properties: required properties for imx7d-pcie and imx8mq-pcie. power-domains: + minItems: 1 items: - description: The phandle pointing to the DISPLAY domain for imx6sx-pcie, to PCIE_PHY power domain for imx7d-pcie and @@ -77,6 +78,7 @@ properties: for imx6sx-pcie. power-domain-names: + minItems: 1 items: - const: pcie - const: pcie_phy @@ -187,6 +189,10 @@ allOf: - {} - const: pcie_phy - const: pcie_inbound_axi + power-domains: + minItems: 2 + power-domain-names: + minItems: 2 - if: properties: compatible: @@ -243,6 +249,32 @@ allOf: maxItems: 3 contains: const: pcie_aux + - if: + properties: + compatible: + contains: + enum: + - fsl,imx6q-pcie + - fsl,imx6qp-pcie + then: + properties: + power-domains: false + power-domain-names: false + + - if: + not: + properties: + compatible: + contains: + enum: + - fsl,imx6sx-pcie + - fsl,imx6q-pcie + - fsl,imx6qp-pcie + then: + properties: + power-domains: + maxItems: 1 + power-domain-names: false unevaluatedProperties: false -- GitLab From 1a2cead15bcfac872c15457ef50ffbbe5ff641bc Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 11 Dec 2022 03:48:59 +0100 Subject: [PATCH 667/694] dt-bindings: imx6q-pcie: Handle more resets on legacy platforms The i.MX6 and i.MX7D does not use block controller to toggle PCIe reset, hence the PCIe DT description contains three reset entries on these older SoCs. Add this exception into the binding document. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221211024859.672076-3-marex@denx.de Signed-off-by: Rob Herring --- .../bindings/pci/fsl,imx6q-pcie.yaml | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml index f2e9404d33f8a..bad980902f66e 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml @@ -84,15 +84,14 @@ properties: - const: pcie_phy resets: + minItems: 2 maxItems: 3 description: Phandles to PCIe-related reset lines exposed by SRC IP block. Additional required by imx7d-pcie and imx8mq-pcie. reset-names: - items: - - const: pciephy - - const: apps - - const: turnoff + minItems: 2 + maxItems: 3 fsl,tx-deemph-gen1: description: Gen1 De-emphasis value (optional required). @@ -276,6 +275,34 @@ allOf: maxItems: 1 power-domain-names: false + - if: + properties: + compatible: + contains: + enum: + - fsl,imx6q-pcie + - fsl,imx6sx-pcie + - fsl,imx6qp-pcie + - fsl,imx7d-pcie + - fsl,imx8mq-pcie + then: + properties: + resets: + minItems: 3 + reset-names: + items: + - const: pciephy + - const: apps + - const: turnoff + else: + properties: + resets: + maxItems: 2 + reset-names: + items: + - const: apps + - const: turnoff + unevaluatedProperties: false examples: -- GitLab From e21a77d8dabe3c6384cdd485cb129c7d2a426708 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 7 Nov 2022 17:03:38 +0100 Subject: [PATCH 668/694] dt-bindings: watchdog: gpio: Convert bindings to YAML Convert the gpio-wdt bindings from text to YAML ones, to permit DT validation. Reviewed-by: Rob Herring Signed-off-by: Marek Vasut Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221107160338.27910-1-marex@denx.de [robh: add missing 'always-running'] Signed-off-by: Rob Herring --- .../devicetree/bindings/watchdog/gpio-wdt.txt | 28 ---------- .../bindings/watchdog/linux,wdt-gpio.yaml | 55 +++++++++++++++++++ 2 files changed, 55 insertions(+), 28 deletions(-) delete mode 100644 Documentation/devicetree/bindings/watchdog/gpio-wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml diff --git a/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt b/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt deleted file mode 100644 index 198794963786b..0000000000000 --- a/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt +++ /dev/null @@ -1,28 +0,0 @@ -* GPIO-controlled Watchdog - -Required Properties: -- compatible: Should contain "linux,wdt-gpio". -- gpios: From common gpio binding; gpio connection to WDT reset pin. -- hw_algo: The algorithm used by the driver. Should be one of the - following values: - - toggle: Either a high-to-low or a low-to-high transition clears - the WDT counter. The watchdog timer is disabled when GPIO is - left floating or connected to a three-state buffer. - - level: Low or high level starts counting WDT timeout, - the opposite level disables the WDT. Active level is determined - by the GPIO flags. -- hw_margin_ms: Maximum time to reset watchdog circuit (milliseconds). - -Optional Properties: -- always-running: If the watchdog timer cannot be disabled, add this flag to - have the driver keep toggling the signal without a client. It will only cease - to toggle the signal when the device is open and the timeout elapsed. - -Example: - watchdog: watchdog { - /* ADM706 */ - compatible = "linux,wdt-gpio"; - gpios = <&gpio3 9 GPIO_ACTIVE_LOW>; - hw_algo = "toggle"; - hw_margin_ms = <1600>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml b/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml new file mode 100644 index 0000000000000..50af79af6416e --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/linux,wdt-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPIO-controlled Watchdog + +maintainers: + - Guenter Roeck + +properties: + compatible: + const: linux,wdt-gpio + + gpios: + description: gpio connection to WDT reset pin + maxItems: 1 + + hw_algo: + description: The algorithm used by the driver. + enum: [ level, toggle ] + + hw_margin_ms: + description: Maximum time to reset watchdog circuit (milliseconds). + $ref: /schemas/types.yaml#/definitions/uint32 + + always-running: + type: boolean + description: + If the watchdog timer cannot be disabled, add this flag to have the driver + keep toggling the signal without a client. + It will only cease to toggle the signal when the device is open and the + timeout elapsed. + +required: + - compatible + - gpios + - hw_algo + - hw_margin_ms + +allOf: + - $ref: watchdog.yaml# + +additionalProperties: false + +examples: + - | + #include + watchdog { + compatible = "linux,wdt-gpio"; + gpios = <&gpio3 9 GPIO_ACTIVE_LOW>; + hw_algo = "toggle"; + hw_margin_ms = <1600>; + }; -- GitLab From ab040c42237f47dffbb1b83bd4e29f739b3a917e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:07 +0100 Subject: [PATCH 669/694] dt-bindings: drop redundant part of title of shared bindings The Devicetree bindings document does not have to say in the title that it is a "binding", but instead just describe the hardware. For shared (re-usable) schemas, name them all as "common properties". Signed-off-by: Krzysztof Kozlowski Acked-by: Guenter Roeck # watchdog Acked-by: Alexandre Belloni Acked-by: Jonathan Cameron # IIO Acked-by: Miquel Raynal Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Vinod Koul # dma Acked-by: Mark Brown Acked-by: Hans Verkuil # media Acked-by: Sebastian Reichel # power Acked-by: Viresh Kumar # opp Link: https://lore.kernel.org/r/20221216163815.522628-2-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/qcom,gcc.yaml | 2 +- Documentation/devicetree/bindings/dma/dma-common.yaml | 2 +- Documentation/devicetree/bindings/dma/dma-controller.yaml | 2 +- Documentation/devicetree/bindings/dma/dma-router.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/adc.yaml | 2 +- Documentation/devicetree/bindings/input/input.yaml | 2 +- .../devicetree/bindings/media/video-interface-devices.yaml | 2 +- Documentation/devicetree/bindings/media/video-interfaces.yaml | 2 +- Documentation/devicetree/bindings/mmc/mmc-controller.yaml | 2 +- Documentation/devicetree/bindings/mtd/nand-chip.yaml | 2 +- Documentation/devicetree/bindings/mtd/nand-controller.yaml | 2 +- .../devicetree/bindings/net/bluetooth/bluetooth-controller.yaml | 2 +- Documentation/devicetree/bindings/net/can/can-controller.yaml | 2 +- Documentation/devicetree/bindings/net/ethernet-controller.yaml | 2 +- Documentation/devicetree/bindings/net/ethernet-phy.yaml | 2 +- Documentation/devicetree/bindings/net/mdio.yaml | 2 +- Documentation/devicetree/bindings/opp/opp-v2-base.yaml | 2 +- Documentation/devicetree/bindings/rtc/rtc.yaml | 2 +- .../devicetree/bindings/soundwire/soundwire-controller.yaml | 2 +- Documentation/devicetree/bindings/spi/spi-controller.yaml | 2 +- Documentation/devicetree/bindings/watchdog/watchdog.yaml | 2 +- 21 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml index 1ab416c83c8d8..7129fbcf2b6c5 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/qcom,gcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Global Clock & Reset Controller Common Bindings +title: Qualcomm Global Clock & Reset Controller Common Properties maintainers: - Stephen Boyd diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml index ad06d36af208e..ea700f8ee6c6a 100644 --- a/Documentation/devicetree/bindings/dma/dma-common.yaml +++ b/Documentation/devicetree/bindings/dma/dma-common.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/dma-common.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DMA Engine Generic Binding +title: DMA Engine Common Properties maintainers: - Vinod Koul diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml index 6d3727267fa80..538ebadff6527 100644 --- a/Documentation/devicetree/bindings/dma/dma-controller.yaml +++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/dma-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DMA Controller Generic Binding +title: DMA Controller Common Properties maintainers: - Vinod Koul diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml index 4b817f5dc30e7..f8d8c3c88bcc9 100644 --- a/Documentation/devicetree/bindings/dma/dma-router.yaml +++ b/Documentation/devicetree/bindings/dma/dma-router.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/dma-router.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DMA Router Generic Binding +title: DMA Router Common Properties maintainers: - Vinod Koul diff --git a/Documentation/devicetree/bindings/iio/adc/adc.yaml b/Documentation/devicetree/bindings/iio/adc/adc.yaml index db348fcbb52cb..2616017297456 100644 --- a/Documentation/devicetree/bindings/iio/adc/adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic IIO bindings for ADC channels +title: IIO Common Properties for ADC Channels maintainers: - Jonathan Cameron diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml index 17512f4347fdd..94f7942189e8c 100644 --- a/Documentation/devicetree/bindings/input/input.yaml +++ b/Documentation/devicetree/bindings/input/input.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/input.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common input schema binding +title: Input Devices Common Properties maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/media/video-interface-devices.yaml b/Documentation/devicetree/bindings/media/video-interface-devices.yaml index 4527f56a5a6eb..cf7712ad297c0 100644 --- a/Documentation/devicetree/bindings/media/video-interface-devices.yaml +++ b/Documentation/devicetree/bindings/media/video-interface-devices.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/video-interface-devices.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common bindings for video receiver and transmitter devices +title: Common Properties for Video Receiver and Transmitter Devices maintainers: - Jacopo Mondi diff --git a/Documentation/devicetree/bindings/media/video-interfaces.yaml b/Documentation/devicetree/bindings/media/video-interfaces.yaml index 34bdad0281808..a211d49dc2ac3 100644 --- a/Documentation/devicetree/bindings/media/video-interfaces.yaml +++ b/Documentation/devicetree/bindings/media/video-interfaces.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/video-interfaces.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common bindings for video receiver and transmitter interface endpoints +title: Common Properties for Video Receiver and Transmitter Interface Endpoints maintainers: - Sakari Ailus diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml index e82c003680886..86c73fd825fdc 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/mmc-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MMC Controller Generic Binding +title: MMC Controller Common Properties maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mtd/nand-chip.yaml b/Documentation/devicetree/bindings/mtd/nand-chip.yaml index 6e2dc025d694d..33d079f76c05d 100644 --- a/Documentation/devicetree/bindings/mtd/nand-chip.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-chip.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/nand-chip.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NAND Chip and NAND Controller Generic Binding +title: NAND Chip Common Properties maintainers: - Miquel Raynal diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index 220aa2c8c0b50..efcd415f86416 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/nand-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NAND Chip and NAND Controller Generic Binding +title: NAND Controller Common Properties maintainers: - Miquel Raynal diff --git a/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml b/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml index 9309dc40f54f0..59bb0d7e8ab33 100644 --- a/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml +++ b/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/bluetooth/bluetooth-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bluetooth Controller Generic Binding +title: Bluetooth Controller Common Properties maintainers: - Marcel Holtmann diff --git a/Documentation/devicetree/bindings/net/can/can-controller.yaml b/Documentation/devicetree/bindings/net/can/can-controller.yaml index 1f0e980510749..217be90960e8f 100644 --- a/Documentation/devicetree/bindings/net/can/can-controller.yaml +++ b/Documentation/devicetree/bindings/net/can/can-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/can-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: CAN Controller Generic Binding +title: CAN Controller Common Properties maintainers: - Marc Kleine-Budde diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 3aef506fa158e..00be387984acb 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/ethernet-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ethernet Controller Generic Binding +title: Ethernet Controller Common Properties maintainers: - David S. Miller diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index ad808e9ce5b9e..1327b81f15a21 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/ethernet-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ethernet PHY Generic Binding +title: Ethernet PHY Common Properties maintainers: - Andrew Lunn diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml index b5706d4e7e38f..a266ade918ca7 100644 --- a/Documentation/devicetree/bindings/net/mdio.yaml +++ b/Documentation/devicetree/bindings/net/mdio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/mdio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MDIO Bus Generic Binding +title: MDIO Bus Common Properties maintainers: - Andrew Lunn diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml index cf9c2f7bddc29..47e6f36b76370 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/opp/opp-v2-base.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic OPP (Operating Performance Points) Common Binding +title: Generic OPP (Operating Performance Points) Common Properties maintainers: - Viresh Kumar diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index 0ec3551f12dd3..c6fff5486fe67 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: RTC Generic Binding +title: Real Time Clock Common Properties maintainers: - Alexandre Belloni diff --git a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml index 4aad121eff3f2..fdeb8af417d7c 100644 --- a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml +++ b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soundwire/soundwire-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: SoundWire Controller Generic Binding +title: SoundWire Controller Common Properties maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml index 01042a7f382e2..5a7c72cadf76b 100644 --- a/Documentation/devicetree/bindings/spi/spi-controller.yaml +++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/spi-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: SPI Controller Generic Binding +title: SPI Controller Common Properties maintainers: - Mark Brown diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml index e3dfb02f0ca52..fccae0d001103 100644 --- a/Documentation/devicetree/bindings/watchdog/watchdog.yaml +++ b/Documentation/devicetree/bindings/watchdog/watchdog.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/watchdog/watchdog.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Watchdog Generic Bindings +title: Watchdog Common Properties maintainers: - Guenter Roeck -- GitLab From 9d94e28505f8033adf98434573074b400c081902 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:08 +0100 Subject: [PATCH 670/694] dt-bindings: memory-controllers: ti,gpmc-child: drop redundant part of title The Devicetree bindings document does not have to say in the title that it is a "Devicetree binding", but instead just describe the hardware. Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Ulf Hansson # MMC Link: https://lore.kernel.org/r/20221216163815.522628-3-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- .../devicetree/bindings/memory-controllers/ti,gpmc-child.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml index 4a257fac577ef..383d19e0ba26c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/ti,gpmc-child.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: device tree bindings for children of the Texas Instruments GPMC +title: Texas Instruments GPMC Bus Child Nodes maintainers: - Tony Lindgren -- GitLab From a0c2153dcfa0a7f58f189622c6c045ff5aafe08e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:09 +0100 Subject: [PATCH 671/694] dt-bindings: clock: st,stm32mp1-rcc: add proper title Add device name in the title, because "Reset Clock Controller" sounds too generic. Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20221216163815.522628-4-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml index 242fe922b035e..5194be0b410e4 100644 --- a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml +++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/st,stm32mp1-rcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Reset Clock Controller Binding +title: STMicroelectronics STM32MP1 Reset Clock Controller maintainers: - Gabriel Fernandez -- GitLab From a612130ca1a650b0ba3599fc3199143eb9e7060d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:10 +0100 Subject: [PATCH 672/694] dt-bindings: drop redundant part of title (end) The Devicetree bindings document does not have to say in the title that it is a "Devicetree binding", but instead just describe the hardware. Drop trailing "Devicetree bindings" in various forms (also with trailing full stop): find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [dD]evice[ -]\?[tT]ree [bB]indings\?\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [dD]evice[ -]\?[nN]ode [bB]indings\?\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [dD][tT] [bB]indings\?\.\?$/title: \1/' {} \; Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Jonathan Cameron # IIO Reviewed-by: Jonathan Cameron Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Dmitry Torokhov # input Acked-by: Mark Brown Acked-by: Hans Verkuil # media Acked-by: Sebastian Reichel # power Link: https://lore.kernel.org/r/20221216163815.522628-5-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml | 2 +- Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml | 2 +- .../devicetree/bindings/arm/firmware/linaro,optee-tz.yaml | 2 +- Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml | 2 +- Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml | 2 +- Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml | 2 +- Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml | 2 +- Documentation/devicetree/bindings/arm/mstar/mstar.yaml | 2 +- Documentation/devicetree/bindings/arm/npcm/npcm.yaml | 2 +- Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml | 2 +- Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml | 2 +- Documentation/devicetree/bindings/arm/socionext/uniphier.yaml | 2 +- Documentation/devicetree/bindings/arm/sprd/sprd.yaml | 2 +- Documentation/devicetree/bindings/arm/stm32/stm32.yaml | 2 +- .../bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml | 2 +- .../devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml | 2 +- .../bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml | 2 +- Documentation/devicetree/bindings/arm/ti/k3.yaml | 2 +- Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml | 2 +- Documentation/devicetree/bindings/clock/ingenic,cgu.yaml | 2 +- .../devicetree/bindings/clock/renesas,versaclock7.yaml | 2 +- Documentation/devicetree/bindings/display/bridge/anx6345.yaml | 2 +- .../devicetree/bindings/display/bridge/chrontel,ch7033.yaml | 2 +- .../devicetree/bindings/display/bridge/ite,it6505.yaml | 2 +- .../devicetree/bindings/display/bridge/ite,it66121.yaml | 2 +- Documentation/devicetree/bindings/display/bridge/ps8640.yaml | 2 +- Documentation/devicetree/bindings/display/ingenic,ipu.yaml | 2 +- Documentation/devicetree/bindings/display/ingenic,lcd.yaml | 2 +- .../devicetree/bindings/display/mediatek/mediatek,cec.yaml | 2 +- .../devicetree/bindings/display/mediatek/mediatek,dsi.yaml | 2 +- .../devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml | 2 +- .../devicetree/bindings/display/mediatek/mediatek,hdmi.yaml | 2 +- .../devicetree/bindings/display/panel/ilitek,ili9163.yaml | 2 +- Documentation/devicetree/bindings/display/panel/panel-lvds.yaml | 2 +- .../devicetree/bindings/display/panel/visionox,rm69299.yaml | 2 +- Documentation/devicetree/bindings/dma/ingenic,dma.yaml | 2 +- Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml | 2 +- Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 2 +- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 2 +- .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml | 2 +- Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml | 2 +- Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml | 2 +- .../devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml | 2 +- .../devicetree/bindings/input/pine64,pinephone-keyboard.yaml | 2 +- .../devicetree/bindings/input/touchscreen/chipone,icn8318.yaml | 2 +- .../devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml | 2 +- .../devicetree/bindings/input/touchscreen/silead,gsl1680.yaml | 2 +- .../devicetree/bindings/interrupt-controller/ingenic,intc.yaml | 2 +- .../bindings/interrupt-controller/realtek,rtl-intc.yaml | 2 +- .../devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml | 2 +- .../devicetree/bindings/media/i2c/dongwoon,dw9768.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ov8856.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml | 2 +- Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml | 2 +- .../bindings/memory-controllers/ingenic,nemc-peripherals.yaml | 2 +- .../devicetree/bindings/memory-controllers/ingenic,nemc.yaml | 2 +- .../devicetree/bindings/memory-controllers/ti,gpmc.yaml | 2 +- Documentation/devicetree/bindings/mips/ingenic/devices.yaml | 2 +- .../devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml | 2 +- Documentation/devicetree/bindings/mips/loongson/devices.yaml | 2 +- Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml | 2 +- Documentation/devicetree/bindings/mtd/ingenic,nand.yaml | 2 +- .../devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml | 2 +- Documentation/devicetree/bindings/net/can/bosch,c_can.yaml | 2 +- Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/dsa-port.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 2 +- .../devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml | 2 +- Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml | 2 +- Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml | 2 +- .../devicetree/bindings/net/wireless/microchip,wilc1000.yaml | 2 +- Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml | 2 +- Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml | 2 +- Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml | 2 +- .../devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml | 2 +- .../devicetree/bindings/power/supply/maxim,ds2760.yaml | 2 +- .../devicetree/bindings/power/supply/maxim,max14656.yaml | 2 +- Documentation/devicetree/bindings/rtc/epson,rx8900.yaml | 2 +- Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml | 2 +- Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml | 2 +- Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml | 2 +- Documentation/devicetree/bindings/serial/ingenic,uart.yaml | 2 +- Documentation/devicetree/bindings/serial/serial.yaml | 2 +- Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml | 2 +- Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml | 2 +- Documentation/devicetree/bindings/sound/ingenic,aic.yaml | 2 +- Documentation/devicetree/bindings/sound/ingenic,codec.yaml | 2 +- .../devicetree/bindings/sound/qcom,lpass-rx-macro.yaml | 2 +- .../devicetree/bindings/sound/qcom,lpass-tx-macro.yaml | 2 +- .../devicetree/bindings/sound/qcom,lpass-va-macro.yaml | 2 +- .../devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml | 2 +- Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml | 2 +- Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml | 2 +- Documentation/devicetree/bindings/sound/ti,src4xxx.yaml | 2 +- Documentation/devicetree/bindings/spi/ingenic,spi.yaml | 2 +- Documentation/devicetree/bindings/spi/spi-gpio.yaml | 2 +- Documentation/devicetree/bindings/timer/ingenic,tcu.yaml | 2 +- Documentation/devicetree/bindings/usb/ingenic,musb.yaml | 2 +- Documentation/devicetree/bindings/usb/maxim,max33359.yaml | 2 +- Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml | 2 +- Documentation/devicetree/bindings/usb/ti,tps6598x.yaml | 2 +- Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml | 2 +- 120 files changed, 120 insertions(+), 120 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml index 8051a75c2c79a..162a39dab2182 100644 --- a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/bcm2835.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM2711/BCM2835 Platforms Device Tree Bindings +title: Broadcom BCM2711/BCM2835 Platforms maintainers: - Eric Anholt diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml index c603243574357..f2bcac0096b73 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm11351.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM11351 device tree bindings +title: Broadcom BCM11351 maintainers: - Florian Fainelli diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml index b3020757380f6..cf4e254e32f19 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm21664.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM21664 device tree bindings +title: Broadcom BCM21664 maintainers: - Florian Fainelli diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml index 37f3a6fcde760..eafec29ba7abd 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm23550.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM23550 device tree bindings +title: Broadcom BCM23550 maintainers: - Florian Fainelli diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml index 52b575c405998..454b0e93245d8 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm4708.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM4708 device tree bindings +title: Broadcom BCM4708 description: Broadcom BCM4708/47081/4709/47094/53012 Wi-Fi/network SoCs based diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml index 84866e29cab0e..07892cbdd23c5 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,bcmbca.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Broadband SoC device tree bindings +title: Broadcom Broadband SoC description: Broadcom Broadband SoCs include family of high performance DSL/PON/Wireless diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml index 432ccf990f9e8..a0a3f32db54ea 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,cygnus.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Cygnus device tree bindings +title: Broadcom Cygnus maintainers: - Ray Jui diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml index 294948399f825..cc6add0e933a0 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,hr2.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Hurricane 2 device tree bindings +title: Broadcom Hurricane 2 description: Broadcom Hurricane 2 family of SoCs are used for switching control. These SoCs diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml index c4847abbecd8e..6696598eca0e9 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,ns2.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom North Star 2 (NS2) device tree bindings +title: Broadcom North Star 2 (NS2) maintainers: - Ray Jui diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml index 7d184ba7d180b..a43b2d4d936b3 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,nsp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Northstar Plus device tree bindings +title: Broadcom Northstar Plus description: Broadcom Northstar Plus family of SoCs are used for switching control diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml index c638e04ebae0f..c6ccb78aab0a5 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,stingray.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Stingray device tree bindings +title: Broadcom Stingray maintainers: - Ray Jui diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml index 4eba182abd535..3f441352fbf06 100644 --- a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/bcm/brcm,vulcan-soc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom Vulcan device tree bindings +title: Broadcom Vulcan maintainers: - Robert Richter diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml index 9a426110a14a9..d4dc0749f9fd1 100644 --- a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml +++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/firmware/linaro,optee-tz.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OP-TEE Device Tree Bindings +title: OP-TEE maintainers: - Jens Wiklander diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml index b38458022946f..540876322040a 100644 --- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml +++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/hisilicon/hisilicon.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Hisilicon Platforms Device Tree Bindings +title: Hisilicon Platforms maintainers: - Wei Xu diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml index 34f5f877d4441..91b96065f7dfc 100644 --- a/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml +++ b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/keystone/ti,sci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI-SCI controller device node bindings +title: TI-SCI controller maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml index e9bf3054529f1..52d78521e4124 100644 --- a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml +++ b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/marvell/armada-7k-8k.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell Armada 7K/8K Platforms Device Tree Bindings +title: Marvell Armada 7K/8K Platforms maintainers: - Gregory CLEMENT diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml index d581161361547..4c43eaf3632e4 100644 --- a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml +++ b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/mrvl/mrvl.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell Platforms Device Tree Bindings +title: Marvell Platforms maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/arm/mstar/mstar.yaml b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml index 8892eb6bd3ef7..937059fcc7b3b 100644 --- a/Documentation/devicetree/bindings/arm/mstar/mstar.yaml +++ b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/mstar/mstar.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MStar platforms device tree bindings +title: MStar platforms maintainers: - Daniel Palmer diff --git a/Documentation/devicetree/bindings/arm/npcm/npcm.yaml b/Documentation/devicetree/bindings/arm/npcm/npcm.yaml index 43409e5721d59..6871483947c51 100644 --- a/Documentation/devicetree/bindings/arm/npcm/npcm.yaml +++ b/Documentation/devicetree/bindings/arm/npcm/npcm.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/npcm/npcm.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NPCM Platforms Device Tree Bindings +title: NPCM Platforms maintainers: - Jonathan Neuschäfer diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml index 214c97bc30634..f1bd6f50e726d 100644 --- a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml +++ b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/nxp/lpc32xx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP LPC32xx Platforms Device Tree Bindings +title: NXP LPC32xx Platforms maintainers: - Roland Stigge diff --git a/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml index aa1d4afbc5105..5a428a885760c 100644 --- a/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml +++ b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/socionext/milbeaut.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Milbeaut platforms device tree bindings +title: Milbeaut platforms maintainers: - Taichi Sugaya diff --git a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml index c2cea1c90f3cc..3e7f3d927ec7b 100644 --- a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml +++ b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/socionext/uniphier.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Socionext UniPhier platform device tree bindings +title: Socionext UniPhier platform maintainers: - Masahiro Yamada diff --git a/Documentation/devicetree/bindings/arm/sprd/sprd.yaml b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml index 2c12e571394b8..eaa67b8e0d6c7 100644 --- a/Documentation/devicetree/bindings/arm/sprd/sprd.yaml +++ b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/arm/sprd/sprd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Unisoc platforms device tree bindings +title: Unisoc platforms maintainers: - Orson Zhai diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml index 44f5c5855af8c..13e34241145b4 100644 --- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/stm32/stm32.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Platforms Device Tree Bindings +title: STMicroelectronics STM32 Platforms maintainers: - Alexandre Torgue diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml index f3878e0b3cc48..d805c4508b4ea 100644 --- a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner CPU Configuration Controller Device Tree Bindings +title: Allwinner CPU Configuration Controller maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml index 668aadbfe4c0b..644f391afb321 100644 --- a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun9i-a80-prcm.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner A80 PRCM Device Tree Bindings +title: Allwinner A80 PRCM maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml index 869c266e7ebc9..6089a96eae4ff 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: NVIDIA Tegra CPU COMPLEX CLUSTER area device tree bindings +title: NVIDIA Tegra CPU COMPLEX CLUSTER area maintainers: - Sumit Gupta diff --git a/Documentation/devicetree/bindings/arm/ti/k3.yaml b/Documentation/devicetree/bindings/arm/ti/k3.yaml index 09e6845ff2439..203faab80142b 100644 --- a/Documentation/devicetree/bindings/arm/ti/k3.yaml +++ b/Documentation/devicetree/bindings/arm/ti/k3.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/ti/k3.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments K3 Multicore SoC architecture device tree bindings +title: Texas Instruments K3 Multicore SoC architecture maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml index c022d325fc08b..1656d1a4476f6 100644 --- a/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml +++ b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/ti/ti,davinci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments DaVinci Platforms Device Tree Bindings +title: Texas Instruments DaVinci Platforms maintainers: - Sekhar Nori diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml index df256ebcd3663..9e733b10c392f 100644 --- a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml +++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/ingenic,cgu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs CGU devicetree bindings +title: Ingenic SoCs CGU description: | The CGU in an Ingenic SoC provides all the clocks generated on-chip. It diff --git a/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml index 8d4eb4475fc8b..b339f1f9f072c 100644 --- a/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/renesas,versaclock7.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Renesas Versaclock7 Programmable Clock Device Tree Bindings +title: Renesas Versaclock7 Programmable Clock maintainers: - Alex Helms diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml index 1c0406c38fe54..9bf2cbcea69fd 100644 --- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml +++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/anx6345.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Analogix ANX6345 eDP Transmitter Device Tree Bindings +title: Analogix ANX6345 eDP Transmitter maintainers: - Torsten Duwe diff --git a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml index bb6289c7d375e..b0589fa16736a 100644 --- a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml +++ b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/display/bridge/chrontel,ch7033.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Chrontel CH7033 Video Encoder Device Tree Bindings +title: Chrontel CH7033 Video Encoder maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml index 833d11b2303a7..b697c42399ea3 100644 --- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/ite,it6505.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ITE it6505 Device Tree Bindings +title: ITE it6505 maintainers: - Allen Chen diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml index 1b2185be92cdd..d3454da1247a0 100644 --- a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/ite,it66121.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ITE it66121 HDMI bridge Device Tree Bindings +title: ITE it66121 HDMI bridge maintainers: - Phong LE diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml index 8ab156e0a8cfd..28811aff2c5ad 100644 --- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/ps8640.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MIPI DSI to eDP Video Format Converter Device Tree Bindings +title: MIPI DSI to eDP Video Format Converter maintainers: - Nicolas Boichat diff --git a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml index 3f93def2c5a20..319bd7c88fe3c 100644 --- a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml +++ b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/ingenic,ipu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs Image Processing Unit (IPU) devicetree bindings +title: Ingenic SoCs Image Processing Unit (IPU) maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml index c0bb02fb49f4b..6d4c00f3fcc88 100644 --- a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml +++ b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/ingenic,lcd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs LCD controller devicetree bindings +title: Ingenic SoCs LCD controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml index 66288b9f0aa6e..080cf321209ea 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/mediatek/mediatek,cec.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Mediatek HDMI CEC Controller Device Tree Bindings +title: Mediatek HDMI CEC Controller maintainers: - CK Hu diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml index b18d6a57c6e1b..4707b60238b07 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/mediatek/mediatek,dsi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek DSI Controller Device Tree Bindings +title: MediaTek DSI Controller maintainers: - Chun-Kuang Hu diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml index b6fcdfb99ab2c..bd8f7b8ae0ff2 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi-ddc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Mediatek HDMI DDC Device Tree Bindings +title: Mediatek HDMI DDC maintainers: - CK Hu diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml index bdaf0b51e68cd..8afdd67d67809 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Mediatek HDMI Encoder Device Tree Bindings +title: Mediatek HDMI Encoder maintainers: - CK Hu diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml index a4154b51043e1..90e323e19edb7 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/ilitek,ili9163.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ilitek ILI9163 display panels device tree bindings +title: Ilitek ILI9163 display panels maintainers: - Daniel Mack diff --git a/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml b/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml index fcc50db6a812c..c77ee034310a3 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/panel-lvds.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic LVDS Display Panel Device Tree Bindings +title: Generic LVDS Display Panel maintainers: - Lad Prabhakar diff --git a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml index 076b057b4af54..481ef051df1e2 100644 --- a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml +++ b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/visionox,rm69299.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Visionox model RM69299 Panels Device Tree Bindings. +title: Visionox model RM69299 Panels maintainers: - Harigovindan P diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml index 3b0b3b919af8c..2be6ed1bc8133 100644 --- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml +++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/ingenic,dma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs DMA Controller DT bindings +title: Ingenic SoCs DMA Controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index 08627d91e607d..a702d2c2ff8d2 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings +title: Texas Instruments K3 DMSS BCDMA maintainers: - Peter Ujfalusi diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index 507d16d84adeb..a69f62f854d8c 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings +title: Texas Instruments K3 DMSS PKTDMA maintainers: - Peter Ujfalusi diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 6a09bbf83d462..7ff428ad3aaed 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings +title: Texas Instruments K3 NAVSS Unified DMA maintainers: - Peter Ujfalusi diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml index 2a595b18ff6c7..825294e3f0e83 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/xilinx/xlnx,zynqmp-dpdma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Xilinx ZynqMP DisplayPort DMA Controller Device Tree Bindings +title: Xilinx ZynqMP DisplayPort DMA Controller description: | These bindings describe the DMA engine included in the Xilinx ZynqMP diff --git a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml index 64cfd971c9c5e..144e86ce5c0a3 100644 --- a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml +++ b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/eeprom/microchip,93lc46b.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip 93xx46 SPI compatible EEPROM family dt bindings +title: Microchip 93xx46 SPI compatible EEPROM family maintainers: - Cory Tusar diff --git a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml index af6d64a6da6e6..b61fdc9548d87 100644 --- a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i2c/ingenic,i2c.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs I2C controller devicetree bindings +title: Ingenic SoCs I2C controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml index 15c514b83583b..a73a355fc6652 100644 --- a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml +++ b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/allwinner,sun8i-a33-ths.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner A33 Thermal Sensor Device Tree Bindings +title: Allwinner A33 Thermal Sensor maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml index 57a31356082ef..720c16a108d4e 100644 --- a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/ti,palmas-gpadc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Palmas general purpose ADC IP block devicetree bindings +title: Palmas general purpose ADC IP block maintainers: - Tony Lindgren diff --git a/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml b/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml index e4a0ac0fff9a7..490f6c3d9e4be 100644 --- a/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml +++ b/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/pine64,pinephone-keyboard.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Pine64 PinePhone keyboard device tree bindings +title: Pine64 PinePhone keyboard maintainers: - Samuel Holland diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml index 9df685bdc5db1..74a8a01e07450 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings +title: ChipOne ICN8318 Touchscreen Controller maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml index f9998edbff702..3305eda5ed88d 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Pixcir Touchscreen Controller Device Tree Bindings +title: Pixcir Touchscreen Controller maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml b/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml index eec6f7f6f0a3a..95b554be25b40 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/silead,gsl1680.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Silead GSL1680 Touchscreen Controller Device Tree Bindings +title: Silead GSL1680 Touchscreen Controller maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml index 0358a7739c8e2..609308a5f91d8 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/ingenic,intc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs interrupt controller devicetree bindings +title: Ingenic SoCs interrupt controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml index 13a893b18fb64..fb5593724059d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/realtek,rtl-intc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Realtek RTL SoC interrupt controller devicetree bindings +title: Realtek RTL SoC interrupt controller description: Interrupt controller and router for Realtek MIPS SoCs, allowing each SoC diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml index 6bda4f2b94c2e..a61a76bb611cd 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/allwinner,sun6i-a31-isp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner A31 Image Signal Processor Driver (ISP) Device Tree Bindings +title: Allwinner A31 Image Signal Processor Driver (ISP) maintainers: - Paul Kocialkowski diff --git a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml index 21864ab86ec46..82d3d18c16a18 100644 --- a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml +++ b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/i2c/dongwoon,dw9768.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Dongwoon Anatech DW9768 Voice Coil Motor (VCM) Lens Device Tree Bindings +title: Dongwoon Anatech DW9768 Voice Coil Motor (VCM) Lens maintainers: - Dongchun Zhu diff --git a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml index baf92aaaf0498..e17288d579812 100644 --- a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/i2c/ov8856.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Omnivision OV8856 CMOS Sensor Device Tree Bindings +title: Omnivision OV8856 CMOS Sensor maintainers: - Dongchun Zhu diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml index 63a040944f3dc..54df9d73dc86f 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/i2c/ovti,ov02a10.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Omnivision OV02A10 CMOS Sensor Device Tree Bindings +title: Omnivision OV02A10 CMOS Sensor maintainers: - Dongchun Zhu diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml index 540fd69ac39f1..a621032f9bd07 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/i2c/ovti,ov5640.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OmniVision OV5640 Image Sensor Device Tree Bindings +title: OmniVision OV5640 Image Sensor maintainers: - Steve Longerbeam diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml index 52c6281a66847..bc9b27afe3ea3 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/i2c/ovti,ov5645.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OmniVision OV5645 Image Sensor Device Tree Bindings +title: OmniVision OV5645 Image Sensor maintainers: - Lad Prabhakar diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml index 246dc5fec716c..61e4e9cf87832 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/i2c/ovti,ov5648.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OmniVision OV5648 Image Sensor Device Tree Bindings +title: OmniVision OV5648 Image Sensor maintainers: - Paul Kocialkowski diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml index b962863e4f657..6bac326dceafd 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/i2c/ovti,ov8865.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OmniVision OV8865 Image Sensor Device Tree Bindings +title: OmniVision OV8865 Image Sensor maintainers: - Paul Kocialkowski diff --git a/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml b/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml index 6597e1d0e65fd..8c28848b226a0 100644 --- a/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml +++ b/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/i2c/st,st-vgxy61.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics VGxy61 HDR Global Shutter Sensor Family Device Tree Bindings +title: STMicroelectronics VGxy61 HDR Global Shutter Sensor Family maintainers: - Benjamin Mugnier diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml index b8ed52a44d571..89ebe3979012c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/ingenic,nemc-peripherals.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs NAND / External Memory Controller (NEMC) devicetree bindings +title: Ingenic SoCs NAND / External Memory Controller (NEMC) maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml index dd13a5106d6c5..a02724221ff3c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/ingenic,nemc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs NAND / External Memory Controller (NEMC) devicetree bindings +title: Ingenic SoCs NAND / External Memory Controller (NEMC) maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml index e188a4bf755c0..4f30173ad7479 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/ti,gpmc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments GPMC Memory Controller device-tree bindings +title: Texas Instruments GPMC Memory Controller maintainers: - Tony Lindgren diff --git a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml index ee00d414df100..f2e822afe7fbc 100644 --- a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml +++ b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mips/ingenic/devices.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic XBurst based Platforms Device Tree Bindings +title: Ingenic XBurst based Platforms maintainers: - 周琰杰 (Zhou Yanjie) diff --git a/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml b/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml index 40130fefa2b41..15d41bdbdc263 100644 --- a/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml +++ b/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mips/lantiq/lantiq,dma-xway.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Lantiq Xway SoCs DMA Controller DT bindings +title: Lantiq Xway SoCs DMA Controller maintainers: - John Crispin diff --git a/Documentation/devicetree/bindings/mips/loongson/devices.yaml b/Documentation/devicetree/bindings/mips/loongson/devices.yaml index 9fee6708e6f59..f13ce386f42cc 100644 --- a/Documentation/devicetree/bindings/mips/loongson/devices.yaml +++ b/Documentation/devicetree/bindings/mips/loongson/devices.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mips/loongson/devices.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Loongson based Platforms Device Tree Bindings +title: Loongson based Platforms maintainers: - Jiaxun Yang diff --git a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml index 2d10aedf2e008..bb4e0be0c8935 100644 --- a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml +++ b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/ingenic,mmc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs MMC Controller DT bindings +title: Ingenic SoCs MMC Controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml index a811a512ecc50..a7bdb5d3675cf 100644 --- a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml +++ b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/ingenic,nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs NAND controller devicetree bindings +title: Ingenic SoCs NAND controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml index 3c51b2d02957d..9c494957a07aa 100644 --- a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml +++ b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/allwinner,sun4i-a10-can.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner A10 CAN Controller Device Tree Bindings +title: Allwinner A10 CAN Controller maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml index 51aa89ac7e850..4d7d67ee175a3 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/bosch,c_can.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bosch C_CAN/D_CAN controller Device Tree Bindings +title: Bosch C_CAN/D_CAN controller description: Bosch C_CAN/D_CAN controller for CAN bus diff --git a/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml b/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml index 4635cb96fc64c..a009a44029385 100644 --- a/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml +++ b/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/ctu,ctucanfd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: CTU CAN FD Open-source IP Core Device Tree Bindings +title: CTU CAN FD Open-source IP Core description: | Open-source CAN FD IP core developed at the Czech Technical University in Prague diff --git a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml index 259a0c6547f30..2a6d126606ca9 100644 --- a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml +++ b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/arrow,xrs700x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Arrow SpeedChips XRS7000 Series Switch Device Tree Bindings +title: Arrow SpeedChips XRS7000 Series Switch allOf: - $ref: dsa.yaml# diff --git a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml index 9abb8eba5fadf..b173fceb89983 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/dsa-port.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ethernet Switch port Device Tree Bindings +title: Ethernet Switch port maintainers: - Andrew Lunn diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index b9d48e357e772..5469ae8a43893 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/dsa.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ethernet Switch Device Tree Bindings +title: Ethernet Switch maintainers: - Andrew Lunn diff --git a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml index 1d7dab31457d5..447589b01e8e9 100644 --- a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml +++ b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/hirschmann,hellcreek.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Hirschmann Hellcreek TSN Switch Device Tree Bindings +title: Hirschmann Hellcreek TSN Switch allOf: - $ref: dsa.yaml# diff --git a/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml b/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml index 8d93ed9c172cb..347a0e1b3d3ff 100644 --- a/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml +++ b/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/mscc,ocelot.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip Ocelot Switch Family Device Tree Bindings +title: Microchip Ocelot Switch Family maintainers: - Vladimir Oltean diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index 1e26d876d1463..df98a16e4e753 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/nxp,sja1105.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP SJA1105 Automotive Ethernet Switch Family Device Tree Bindings +title: NXP SJA1105 Automotive Ethernet Switch Family description: The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944.pdf) of at diff --git a/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml b/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml index 284ef45add998..5557676e9d4bc 100644 --- a/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml +++ b/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/wireless/esp,esp8089.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Espressif ESP8089 Device Tree Bindings +title: Espressif ESP8089 maintainers: - Hans de Goede diff --git a/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml b/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml index b3405f284580a..2460ccc082371 100644 --- a/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml +++ b/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/wireless/microchip,wilc1000.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip WILC wireless devicetree bindings +title: Microchip WILC wireless maintainers: - Adham Abozaeid diff --git a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml index b35d2f3ad1adc..583db5d42226d 100644 --- a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml +++ b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/net/wireless/silabs,wfx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Silicon Labs WFxxx devicetree bindings +title: Silicon Labs WFxxx maintainers: - Jérôme Pouiller diff --git a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml index ee79e13b5fe0d..e08504ef3b6e1 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/qcom,spmi-sdam.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. SPMI SDAM DT bindings +title: Qualcomm Technologies, Inc. SPMI SDAM maintainers: - Shyam Kumar Thella diff --git a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml index 5cab216486321..30b42008db063 100644 --- a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml +++ b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/ingenic,phy-usb.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs USB PHY devicetree bindings +title: Ingenic SoCs USB PHY maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml index c2c370448b817..a4397930e0e80 100644 --- a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pinctrl/ingenic,pinctrl.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs pin controller devicetree bindings +title: Ingenic SoCs pin controller description: > Please refer to pinctrl-bindings.txt in this directory for details of the diff --git a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml index 46de35861738e..11f1f98c1cdc1 100644 --- a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml +++ b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/reset/xlnx,zynqmp-power.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Xilinx Zynq MPSoC Power Management Device Tree Bindings +title: Xilinx Zynq MPSoC Power Management maintainers: - Michal Simek diff --git a/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml b/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml index c838efcf7e160..5faa2418fe2ff 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/maxim,ds2760.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Maxim DS2760 DT bindings +title: Maxim DS2760 maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml index 070ef6f96e60d..711066b8cdb9e 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/maxim,max14656.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Maxim MAX14656 DT bindings +title: Maxim MAX14656 maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml index d12855e7ffd76..1df7c45d95c18 100644 --- a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml +++ b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/epson,rx8900.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: EPSON RX8900 / Microcrystal RV8803 Real-Time Clock DT bindings +title: EPSON RX8900 / Microcrystal RV8803 Real-Time Clock maintainers: - Marek Vasut diff --git a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml index b235b2441997f..af78b67b3da4d 100644 --- a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/ingenic,rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs Real-Time Clock DT bindings +title: Ingenic SoCs Real-Time Clock maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml index 2d4741f516639..f6e0c613af678 100644 --- a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/renesas,rzn1-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Renesas RZ/N1 SoCs Real-Time Clock DT bindings +title: Renesas RZ/N1 SoCs Real-Time Clock maintainers: - Miquel Raynal diff --git a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml index 6d176588df47d..89c462653e2d3 100644 --- a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml +++ b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serial/brcm,bcm7271-uart.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom 8250 based serial port devicetree bindings +title: Broadcom 8250 based serial port maintainers: - Al Cooper diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml index 315ceb722e199..d5f153bdeb0d2 100644 --- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml +++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serial/ingenic,uart.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs UART controller devicetree bindings +title: Ingenic SoCs UART controller maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/serial/serial.yaml b/Documentation/devicetree/bindings/serial/serial.yaml index c75ba3fb64650..11e822bf09e2c 100644 --- a/Documentation/devicetree/bindings/serial/serial.yaml +++ b/Documentation/devicetree/bindings/serial/serial.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/serial/serial.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Serial Interface Generic DT Bindings +title: Serial Interface Generic maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml index d911fa2d40ef9..f21eb907ee902 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soc/mediatek/mtk-svs.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek Smart Voltage Scaling (SVS) Device Tree Bindings +title: MediaTek Smart Voltage Scaling (SVS) maintainers: - Roger Lu diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml index 422cbf38bfdbc..670b67ec0b617 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/cirrus,cs42l51.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: CS42L51 audio codec DT bindings +title: CS42L51 audio codec maintainers: - Olivier Moysan diff --git a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml index ba44406c9cafc..c59a7cd9eaa9b 100644 --- a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml +++ b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ingenic,aic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs AC97 / I2S Controller (AIC) DT bindings +title: Ingenic SoCs AC97 / I2S Controller (AIC) maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml index a07d607e9b937..b58b90850e35a 100644 --- a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml +++ b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ingenic,codec.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic JZ47xx internal codec DT bindings +title: Ingenic JZ47xx internal codec maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml index 23564fd394a2c..79c6f8da1319c 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,lpass-rx-macro.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: LPASS(Low Power Audio Subsystem) RX Macro audio codec DT bindings +title: LPASS(Low Power Audio Subsystem) RX Macro audio codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml index 38708578ee290..66431aade3b70 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,lpass-tx-macro.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: LPASS(Low Power Audio Subsystem) TX Macro audio codec DT bindings +title: LPASS(Low Power Audio Subsystem) TX Macro audio codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml index 188883a2e6714..26f0343b5aac0 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,lpass-va-macro.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: LPASS(Low Power Audio Subsystem) VA Macro audio codec DT bindings +title: LPASS(Low Power Audio Subsystem) VA Macro audio codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml index bebca3e3f86fe..2bf8d082f8f10 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,lpass-wsa-macro.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: LPASS(Low Power Audio Subsystem) VA Macro audio codec DT bindings +title: LPASS(Low Power Audio Subsystem) VA Macro audio codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml index ea7d4900ee4a5..7dac9e6f7f08e 100644 --- a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml +++ b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/realtek,rt1015p.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Realtek rt1015p codec devicetree bindings +title: Realtek rt1015p codec maintainers: - Tzung-Bi Shih diff --git a/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml b/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml index e631ace7aad1f..ecfa7a5768663 100644 --- a/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml +++ b/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/realtek,rt5682s.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Realtek rt5682s codec devicetree bindings +title: Realtek rt5682s codec maintainers: - Derek Fang diff --git a/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml b/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml index 988ce8d8028fb..27230c682d10f 100644 --- a/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml +++ b/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ti,src4xxx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments SRC4392 Device Tree Bindings +title: Texas Instruments SRC4392 description: | The SRC4392 is a digital audio codec that can be connected via diff --git a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml index 360f76c226d92..c08d55b900bbc 100644 --- a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml +++ b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/ingenic,spi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs SPI controller devicetree bindings +title: Ingenic SoCs SPI controller maintainers: - Artur Rojek diff --git a/Documentation/devicetree/bindings/spi/spi-gpio.yaml b/Documentation/devicetree/bindings/spi/spi-gpio.yaml index 0d0b6d9dad1ca..f29b89076c990 100644 --- a/Documentation/devicetree/bindings/spi/spi-gpio.yaml +++ b/Documentation/devicetree/bindings/spi/spi-gpio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/spi-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: SPI-GPIO devicetree bindings +title: SPI-GPIO maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml index a84fef0fe628f..2d14610888a71 100644 --- a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml +++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/timer/ingenic,tcu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic SoCs Timer/Counter Unit (TCU) devicetree bindings +title: Ingenic SoCs Timer/Counter Unit (TCU) description: | For a description of the TCU hardware and drivers, have a look at diff --git a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml index 59212358fcce4..4cc1496a913c2 100644 --- a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml +++ b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/ingenic,musb.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic JZ47xx USB IP DT bindings +title: Ingenic JZ47xx USB IP maintainers: - Paul Cercueil diff --git a/Documentation/devicetree/bindings/usb/maxim,max33359.yaml b/Documentation/devicetree/bindings/usb/maxim,max33359.yaml index 93a19eda610b9..8e513a6af3789 100644 --- a/Documentation/devicetree/bindings/usb/maxim,max33359.yaml +++ b/Documentation/devicetree/bindings/usb/maxim,max33359.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/maxim,max33359.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Maxim TCPCI Type-C PD controller DT bindings +title: Maxim TCPCI Type-C PD controller maintainers: - Badhri Jagan Sridharan diff --git a/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml b/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml index 8db1f8b597c34..c72257c19220f 100644 --- a/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml +++ b/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/mediatek,mt6360-tcpc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Mediatek MT6360 Type-C Port Switch and Power Delivery controller DT bindings +title: Mediatek MT6360 Type-C Port Switch and Power Delivery controller maintainers: - ChiYuan Huang diff --git a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml index a4c53b1f1af35..fef4acdc4773b 100644 --- a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml +++ b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/ti,tps6598x.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Texas Instruments 6598x Type-C Port Switch and Power Delivery controller DT bindings +title: Texas Instruments 6598x Type-C Port Switch and Power Delivery controller maintainers: - Bryan O'Donoghue diff --git a/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml b/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml index 5aa4ffd671191..937670de01cc2 100644 --- a/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml +++ b/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/willsemi,wusb3801.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: WUSB3801 Type-C port controller DT bindings +title: WUSB3801 Type-C port controller description: The Will Semiconductor WUSB3801 is a USB Type-C port controller which -- GitLab From 9fa3ad1a1ab31da4887b48e68ad529af78f119b6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:11 +0100 Subject: [PATCH 673/694] dt-bindings: drop redundant part of title (end, part two) The Devicetree bindings document does not have to say in the title that it is a "binding", but instead just describe the hardware. Drop trailing "Node|Tree|Generic bindings" in various forms (also with trailing full stop): find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [nN]ode [bB]indings\?\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [tT]ree [bB]indings\?\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [gG]eneric [bB]indings\?\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [bB]indings\? description\.\?$/title: \1/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [bB]indings\? document\.\?$/title: \1/' {} \; Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Reviewed-by: Jonathan Cameron Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Vinod Koul # phy Acked-by: Mark Brown Link: https://lore.kernel.org/r/20221216163815.522628-6-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/ti,sci-clk.yaml | 2 +- Documentation/devicetree/bindings/cpu/idle-states.yaml | 2 +- .../devicetree/bindings/net/dsa/microchip,lan937x.yaml | 2 +- .../devicetree/bindings/net/wireless/mediatek,mt76.yaml | 2 +- Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml | 2 +- Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml | 2 +- Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml | 2 +- Documentation/devicetree/bindings/power/domain-idle-state.yaml | 2 +- .../devicetree/bindings/reserved-memory/shared-dma-pool.yaml | 2 +- Documentation/devicetree/bindings/reset/ti,sci-reset.yaml | 2 +- Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml | 2 +- Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml | 2 +- Documentation/devicetree/bindings/sound/audio-graph-port.yaml | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml index 0e370289a0530..63d9763416965 100644 --- a/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml +++ b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/ti,sci-clk.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI-SCI clock controller node bindings +title: TI-SCI clock controller maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/cpu/idle-states.yaml b/Documentation/devicetree/bindings/cpu/idle-states.yaml index fa4d4142ac937..b8cc826c95013 100644 --- a/Documentation/devicetree/bindings/cpu/idle-states.yaml +++ b/Documentation/devicetree/bindings/cpu/idle-states.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/cpu/idle-states.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Idle states binding description +title: Idle states maintainers: - Lorenzo Pieralisi diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml index 630bf0f8294b2..b34de303966ba 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/dsa/microchip,lan937x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: LAN937x Ethernet Switch Series Tree Bindings +title: LAN937x Ethernet Switch Series maintainers: - UNGLinuxDriver@microchip.com diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml index 70e328589cfb8..f0c78f9944913 100644 --- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml +++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/net/wireless/mediatek,mt76.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek mt76 wireless devices Generic Binding +title: MediaTek mt76 wireless devices maintainers: - Felix Fietkau diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml index 7029cb1f38ffb..0e5412cff2bc6 100644 --- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/wireless/qca,ath9k.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Atheros ath9k wireless devices Generic Binding +title: Qualcomm Atheros ath9k wireless devices maintainers: - Toke Høiland-Jørgensen diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index f7cf135aa37ff..556eb523606a3 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/net/wireless/qcom,ath11k.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies ath11k wireless devices Generic Binding +title: Qualcomm Technologies ath11k wireless devices maintainers: - Kalle Valo diff --git a/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml index 70eb48b391c9b..527010702f5e7 100644 --- a/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/brcm,ns2-pcie-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom NS2 PCIe PHY binding document +title: Broadcom NS2 PCIe PHY maintainers: - Ray Jui diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml index 0655e485b2604..aa97478dd0161 100644 --- a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/qcom,usb-hs-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm's USB HS PHY binding description +title: Qualcomm's USB HS PHY maintainers: - Bjorn Andersson diff --git a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml index da7cac537e15c..7bf5c42b2848c 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml @@ -5,7 +5,7 @@ $id: "http://devicetree.org/schemas/phy/ti,phy-gmii-sel.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: CPSW Port's Interface Mode Selection PHY Tree Bindings +title: CPSW Port's Interface Mode Selection PHY maintainers: - Kishon Vijay Abraham I diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.yaml b/Documentation/devicetree/bindings/power/domain-idle-state.yaml index 4ee920a1de69e..ec1f6f669e50b 100644 --- a/Documentation/devicetree/bindings/power/domain-idle-state.yaml +++ b/Documentation/devicetree/bindings/power/domain-idle-state.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/domain-idle-state.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: PM Domain Idle States binding description +title: PM Domain Idle States maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml b/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml index 618105f079bee..47696073b6658 100644 --- a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml +++ b/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/reserved-memory/shared-dma-pool.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: /reserved-memory DMA pool node bindings +title: /reserved-memory DMA pool maintainers: - devicetree-spec@vger.kernel.org diff --git a/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml b/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml index 4639d2cec5575..dcf9206e12bec 100644 --- a/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml +++ b/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/reset/ti,sci-reset.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI-SCI reset controller node bindings +title: TI-SCI reset controller maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml b/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml index afc835eda0efe..f436f2cf1df71 100644 --- a/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml +++ b/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/reset/ti,tps380x-reset.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI TPS380x reset controller node bindings +title: TI TPS380x reset controller maintainers: - Marco Felsch diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml index 9e6cb4ee9755d..5df7688a1e1ce 100644 --- a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml +++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soc/ti/sci-pm-domain.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI-SCI generic power domain node bindings +title: TI-SCI generic power domain maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml index 64654ceef2089..f5b8b6d13077c 100644 --- a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml +++ b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/audio-graph-port.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Audio Graph Card 'port' Node Bindings +title: Audio Graph Card 'port' maintainers: - Kuninori Morimoto -- GitLab From 84e85359f4999a439aa12e04bf0ae9e13e00fc66 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:12 +0100 Subject: [PATCH 674/694] dt-bindings: drop redundant part of title (end, part three) The Devicetree bindings document does not have to say in the title that it is a "binding", but instead just describe the hardware. Drop trailing "bindings" in various forms (also with trailing full stop): find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -not -name 'trivial-devices.yaml' \ -exec sed -i -e 's/^title: \(.*\) [bB]indings\?\.\?$/title: \1/' {} \; Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Matti Vaittinen # ROHM Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Dmitry Torokhov # input Acked-by: Mark Brown Acked-by: Hans Verkuil # media Acked-by: Sebastian Reichel # power Acked-by: Viresh Kumar # cpufreq Link: https://lore.kernel.org/r/20221216163815.522628-7-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/cci-control-port.yaml | 2 +- Documentation/devicetree/bindings/arm/cpus.yaml | 2 +- .../devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml | 2 +- Documentation/devicetree/bindings/arm/sp810.yaml | 2 +- Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml | 2 +- .../devicetree/bindings/arm/stm32/st,stm32-syscon.yaml | 2 +- .../devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml | 2 +- .../devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml | 2 +- Documentation/devicetree/bindings/arm/vexpress-config.yaml | 2 +- Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml | 2 +- .../devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml | 2 +- .../devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml | 2 +- Documentation/devicetree/bindings/bus/ti-sysc.yaml | 2 +- Documentation/devicetree/bindings/clock/fsl,plldig.yaml | 2 +- Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx8m-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml | 2 +- Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx93-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/intel,agilex.yaml | 2 +- Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml | 2 +- Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml | 2 +- Documentation/devicetree/bindings/clock/intel,stratix10.yaml | 2 +- .../devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml | 2 +- Documentation/devicetree/bindings/clock/milbeaut-clock.yaml | 2 +- .../devicetree/bindings/clock/nuvoton,npcm845-clk.yaml | 2 +- Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml | 2 +- .../devicetree/bindings/clock/rockchip,rk3568-cru.yaml | 2 +- .../devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml | 2 +- .../devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml | 2 +- Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml | 2 +- Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml | 2 +- Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml | 2 +- Documentation/devicetree/bindings/display/arm,hdlcd.yaml | 2 +- Documentation/devicetree/bindings/display/arm,malidp.yaml | 2 +- .../devicetree/bindings/display/bridge/toshiba,tc358767.yaml | 2 +- .../devicetree/bindings/display/bridge/toshiba,tc358775.yaml | 2 +- .../devicetree/bindings/display/panel/display-timings.yaml | 2 +- .../devicetree/bindings/display/panel/panel-timing.yaml | 2 +- Documentation/devicetree/bindings/dma/st,stm32-dma.yaml | 2 +- Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml | 2 +- Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml | 2 +- Documentation/devicetree/bindings/edac/dmc-520.yaml | 2 +- Documentation/devicetree/bindings/firmware/arm,scmi.yaml | 2 +- Documentation/devicetree/bindings/firmware/arm,scpi.yaml | 2 +- .../devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml | 2 +- Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml | 2 +- Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml | 2 +- Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml | 2 +- Documentation/devicetree/bindings/gpu/vivante,gc.yaml | 2 +- .../devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml | 2 +- Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml | 2 +- Documentation/devicetree/bindings/i2c/i2c-pxa.yaml | 2 +- Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml | 2 +- Documentation/devicetree/bindings/i3c/i3c.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml | 2 +- .../devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml | 2 +- Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml | 2 +- .../devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml | 2 +- Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml | 2 +- Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml | 2 +- .../devicetree/bindings/iio/multiplexer/io-channel-mux.yaml | 2 +- .../bindings/input/touchscreen/cypress,cy8ctma140.yaml | 2 +- .../bindings/input/touchscreen/cypress,cy8ctma340.yaml | 2 +- .../devicetree/bindings/input/touchscreen/edt-ft5x06.yaml | 2 +- Documentation/devicetree/bindings/input/touchscreen/goodix.yaml | 2 +- .../devicetree/bindings/input/touchscreen/himax,hx83112b.yaml | 2 +- .../devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml | 2 +- .../devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml | 2 +- .../devicetree/bindings/input/touchscreen/melfas,mms114.yaml | 2 +- .../devicetree/bindings/input/touchscreen/mstar,msg2638.yaml | 2 +- .../devicetree/bindings/input/touchscreen/ti,tsc2005.yaml | 2 +- .../devicetree/bindings/input/touchscreen/touchscreen.yaml | 2 +- .../devicetree/bindings/input/touchscreen/zinitix,bt400.yaml | 2 +- .../devicetree/bindings/interrupt-controller/mrvl,intc.yaml | 2 +- .../bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml | 2 +- Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml | 2 +- Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml | 2 +- .../devicetree/bindings/leds/backlight/gpio-backlight.yaml | 2 +- .../devicetree/bindings/leds/backlight/led-backlight.yaml | 2 +- .../devicetree/bindings/leds/backlight/pwm-backlight.yaml | 2 +- .../devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml | 2 +- Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml | 2 +- Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml | 2 +- Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml | 2 +- Documentation/devicetree/bindings/media/renesas,ceu.yaml | 2 +- Documentation/devicetree/bindings/media/st,stm32-cec.yaml | 2 +- Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml | 2 +- Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml | 2 +- .../bindings/memory-controllers/calxeda-ddr-ctrlr.yaml | 2 +- .../bindings/memory-controllers/st,stm32-fmc2-ebi.yaml | 2 +- Documentation/devicetree/bindings/mfd/actions,atc260x.yaml | 2 +- Documentation/devicetree/bindings/mfd/ene-kb3930.yaml | 2 +- Documentation/devicetree/bindings/mfd/ene-kb930.yaml | 2 +- Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml | 2 +- Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml | 2 +- Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml | 2 +- Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml | 2 +- Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml | 2 +- Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml | 2 +- Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml | 2 +- Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml | 2 +- Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml | 2 +- Documentation/devicetree/bindings/mfd/st,stmfx.yaml | 2 +- Documentation/devicetree/bindings/mfd/st,stpmic1.yaml | 2 +- Documentation/devicetree/bindings/mips/cpus.yaml | 2 +- Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml | 2 +- Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml | 2 +- .../devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml | 2 +- Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml | 2 +- Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml | 2 +- Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml | 2 +- Documentation/devicetree/bindings/mmc/mtk-sd.yaml | 2 +- Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml | 2 +- Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml | 2 +- Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml | 2 +- Documentation/devicetree/bindings/mtd/gpmi-nand.yaml | 2 +- Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml | 2 +- Documentation/devicetree/bindings/mtd/mxc-nand.yaml | 2 +- .../devicetree/bindings/mtd/partitions/qcom,smem-part.yaml | 2 +- Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml | 2 +- Documentation/devicetree/bindings/mux/gpio-mux.yaml | 2 +- Documentation/devicetree/bindings/mux/mux-consumer.yaml | 2 +- Documentation/devicetree/bindings/mux/mux-controller.yaml | 2 +- Documentation/devicetree/bindings/mux/reg-mux.yaml | 2 +- Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml | 2 +- Documentation/devicetree/bindings/net/can/bosch,m_can.yaml | 2 +- Documentation/devicetree/bindings/net/can/can-transceiver.yaml | 2 +- Documentation/devicetree/bindings/net/engleder,tsnep.yaml | 2 +- Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml | 2 +- Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml | 2 +- Documentation/devicetree/bindings/net/wireless/ieee80211.yaml | 2 +- .../devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml | 2 +- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 2 +- .../devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml | 2 +- Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml | 2 +- Documentation/devicetree/bindings/opp/opp-v1.yaml | 2 +- Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml | 2 +- Documentation/devicetree/bindings/opp/opp-v2.yaml | 2 +- Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml | 2 +- Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml | 2 +- .../devicetree/bindings/phy/intel,keembay-phy-usb.yaml | 2 +- .../devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml | 2 +- Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml | 2 +- Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml | 2 +- Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml | 2 +- Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml | 2 +- Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml | 2 +- Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml | 2 +- Documentation/devicetree/bindings/phy/transmit-amplitude.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml | 2 +- Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml | 2 +- .../devicetree/bindings/power/supply/dlg,da9150-charger.yaml | 2 +- .../devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml | 2 +- .../devicetree/bindings/power/supply/ingenic,battery.yaml | 2 +- .../devicetree/bindings/power/supply/lltc,lt3651-charger.yaml | 2 +- .../devicetree/bindings/power/supply/sc2731-charger.yaml | 2 +- Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml | 2 +- Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml | 2 +- .../devicetree/bindings/regulator/st,stm32-booster.yaml | 2 +- .../devicetree/bindings/regulator/st,stm32-vrefbuf.yaml | 2 +- .../devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml | 2 +- Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml | 2 +- .../devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml | 2 +- .../devicetree/bindings/remoteproc/st,stm32-rproc.yaml | 2 +- Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml | 2 +- .../devicetree/bindings/rng/silex-insight,ba431-rng.yaml | 2 +- Documentation/devicetree/bindings/rng/st,stm32-rng.yaml | 2 +- .../devicetree/bindings/rng/xiphera,xip8001b-trng.yaml | 2 +- Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml | 2 +- Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml | 2 +- Documentation/devicetree/bindings/serial/8250.yaml | 2 +- Documentation/devicetree/bindings/serial/rs485.yaml | 2 +- Documentation/devicetree/bindings/serial/st,stm32-uart.yaml | 2 +- Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml | 2 +- Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml | 2 +- Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml | 2 +- Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml | 2 +- Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml | 2 +- Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml | 2 +- Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml | 2 +- .../devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml | 2 +- .../devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml | 2 +- Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml | 2 +- Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml | 2 +- Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml | 2 +- Documentation/devicetree/bindings/spi/st,stm32-spi.yaml | 2 +- Documentation/devicetree/bindings/thermal/imx-thermal.yaml | 2 +- Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml | 2 +- Documentation/devicetree/bindings/thermal/sprd-thermal.yaml | 2 +- Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml | 2 +- .../devicetree/bindings/thermal/thermal-cooling-devices.yaml | 2 +- Documentation/devicetree/bindings/thermal/thermal-idle.yaml | 2 +- Documentation/devicetree/bindings/thermal/thermal-sensor.yaml | 2 +- Documentation/devicetree/bindings/thermal/thermal-zones.yaml | 2 +- Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml | 2 +- Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml | 2 +- Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml | 2 +- Documentation/devicetree/bindings/timer/st,stm32-timer.yaml | 2 +- Documentation/devicetree/bindings/usb/analogix,anx7411.yaml | 2 +- Documentation/devicetree/bindings/usb/cdns,usb3.yaml | 2 +- Documentation/devicetree/bindings/usb/dwc2.yaml | 2 +- Documentation/devicetree/bindings/usb/faraday,fotg210.yaml | 2 +- Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml | 2 +- Documentation/devicetree/bindings/usb/nxp,isp1760.yaml | 2 +- Documentation/devicetree/bindings/usb/richtek,rt1719.yaml | 2 +- Documentation/devicetree/bindings/usb/st,stusb160x.yaml | 2 +- Documentation/devicetree/bindings/virtio/virtio-device.yaml | 2 +- Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml | 2 +- 223 files changed, 223 insertions(+), 223 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/cci-control-port.yaml b/Documentation/devicetree/bindings/arm/cci-control-port.yaml index c9114866213f3..c29d250a6d772 100644 --- a/Documentation/devicetree/bindings/arm/cci-control-port.yaml +++ b/Documentation/devicetree/bindings/arm/cci-control-port.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/cci-control-port.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: CCI Interconnect Bus Masters binding +title: CCI Interconnect Bus Masters maintainers: - Lorenzo Pieralisi diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 7dd84f8f8e4fb..01b5a9c689a29 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/cpus.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ARM CPUs bindings +title: ARM CPUs maintainers: - Lorenzo Pieralisi diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml index 5cbcacaeb441e..ff378d5cbd32a 100644 --- a/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml +++ b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/keystone/ti,k3-sci-common.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common K3 TI-SCI bindings +title: Common K3 TI-SCI maintainers: - Nishanth Menon diff --git a/Documentation/devicetree/bindings/arm/sp810.yaml b/Documentation/devicetree/bindings/arm/sp810.yaml index bc8e524aa90ad..c9094e5ec5657 100644 --- a/Documentation/devicetree/bindings/arm/sp810.yaml +++ b/Documentation/devicetree/bindings/arm/sp810.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/sp810.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ARM Versatile Express SP810 System Controller bindings +title: ARM Versatile Express SP810 System Controller maintainers: - Andre Przywara diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml index ecb28e90fd115..2297ad3f4774c 100644 --- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/arm/stm32/st,mlahb.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 ML-AHB interconnect bindings +title: STMicroelectronics STM32 ML-AHB interconnect maintainers: - Fabien Dessenne diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml index 6f846d69c5e16..b2b156cc160ae 100644 --- a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/arm/stm32/st,stm32-syscon.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 Platforms System Controller bindings +title: STMicroelectronics STM32 Platforms System Controller maintainers: - Alexandre Torgue diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml index debb2b0c80137..dd3a4770c6a1f 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: NVIDIA Tegra194 CBB 1.0 bindings +title: NVIDIA Tegra194 CBB 1.0 maintainers: - Sumit Gupta diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml index 7b1fe50ffbe0b..44184ee01449f 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra234-cbb.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: NVIDIA Tegra CBB 2.0 bindings +title: NVIDIA Tegra CBB 2.0 maintainers: - Sumit Gupta diff --git a/Documentation/devicetree/bindings/arm/vexpress-config.yaml b/Documentation/devicetree/bindings/arm/vexpress-config.yaml index 09e1adf5ca7a0..b74380da3198b 100644 --- a/Documentation/devicetree/bindings/arm/vexpress-config.yaml +++ b/Documentation/devicetree/bindings/arm/vexpress-config.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/vexpress-config.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ARM Versatile Express configuration bus bindings +title: ARM Versatile Express configuration bus maintainers: - Andre Przywara diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml index f04db802a7321..be6e3b5425690 100644 --- a/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml +++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/arm/vexpress-sysreg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ARM Versatile Express system registers bindings +title: ARM Versatile Express system registers maintainers: - Andre Przywara diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml index cb530b46beff5..2011bd03cdcd9 100644 --- a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/ata/allwinner,sun4i-a10-ahci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner A10 AHCI SATA Controller bindings +title: Allwinner A10 AHCI SATA Controller maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml index e6b42a113ff17..a2afe2ad60638 100644 --- a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/ata/allwinner,sun8i-r40-ahci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Allwinner R40 AHCI SATA Controller bindings +title: Allwinner R40 AHCI SATA Controller maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.yaml b/Documentation/devicetree/bindings/bus/ti-sysc.yaml index fced4082b047b..f089634f9466a 100644 --- a/Documentation/devicetree/bindings/bus/ti-sysc.yaml +++ b/Documentation/devicetree/bindings/bus/ti-sysc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/bus/ti-sysc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments interconnect target module binding +title: Texas Instruments interconnect target module maintainers: - Tony Lindgren diff --git a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml index 9ac716dfa602f..88dd9c18db922 100644 --- a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml +++ b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fsl,plldig.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock Binding +title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock maintainers: - Wen He diff --git a/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml index fc3bdfdc091ab..3bca9d11c148f 100644 --- a/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml +++ b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fsl,sai-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale SAI bitclock-as-a-clock binding +title: Freescale SAI bitclock-as-a-clock maintainers: - Michael Walle diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml index 458c7645ee683..e4c4cadec501a 100644 --- a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx8m-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX8M Family Clock Control Module Binding +title: NXP i.MX8M Family Clock Control Module maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml index cb80105b3c708..b207f95361b20 100644 --- a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml +++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx8qxp-lpcg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX8QXP LPCG (Low-Power Clock Gating) Clock bindings +title: NXP i.MX8QXP LPCG (Low-Power Clock Gating) Clock maintainers: - Aisheng Dong diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml index 71f7186b135ba..68a60cdc19af3 100644 --- a/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx8ulp-cgc-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX8ULP Clock Generation & Control(CGC) Module Binding +title: NXP i.MX8ULP Clock Generation & Control(CGC) Module maintainers: - Jacky Bai diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml index 00612725bf8b1..d0b0792fe7bab 100644 --- a/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx8ulp-pcc-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX8ULP Peripheral Clock Controller(PCC) Module Binding +title: NXP i.MX8ULP Peripheral Clock Controller(PCC) Module maintainers: - Jacky Bai diff --git a/Documentation/devicetree/bindings/clock/imx93-clock.yaml b/Documentation/devicetree/bindings/clock/imx93-clock.yaml index 21a06194e4a3a..ccb53c6b96c11 100644 --- a/Documentation/devicetree/bindings/clock/imx93-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx93-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx93-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX93 Clock Control Module Binding +title: NXP i.MX93 Clock Control Module maintainers: - Peng Fan diff --git a/Documentation/devicetree/bindings/clock/intel,agilex.yaml b/Documentation/devicetree/bindings/clock/intel,agilex.yaml index cf5a9eb803e62..3745ba8dbd763 100644 --- a/Documentation/devicetree/bindings/clock/intel,agilex.yaml +++ b/Documentation/devicetree/bindings/clock/intel,agilex.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/intel,agilex.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel SoCFPGA Agilex platform clock controller binding +title: Intel SoCFPGA Agilex platform clock controller maintainers: - Dinh Nguyen diff --git a/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml index f3e1a700a2ca5..76609a390429a 100644 --- a/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml +++ b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/intel,cgu-lgm.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel Lightning Mountain SoC's Clock Controller(CGU) Binding +title: Intel Lightning Mountain SoC's Clock Controller(CGU) maintainers: - Rahul Tanwar diff --git a/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml index 8f45976e946ee..e000116a51a44 100644 --- a/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml +++ b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/intel,easic-n5x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel SoCFPGA eASIC N5X platform clock controller binding +title: Intel SoCFPGA eASIC N5X platform clock controller maintainers: - Dinh Nguyen diff --git a/Documentation/devicetree/bindings/clock/intel,stratix10.yaml b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml index f506e3db9782f..b4a8be2134001 100644 --- a/Documentation/devicetree/bindings/clock/intel,stratix10.yaml +++ b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/intel,stratix10.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel SoCFPGA Stratix10 platform clock controller binding +title: Intel SoCFPGA Stratix10 platform clock controller maintainers: - Dinh Nguyen diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml index b2ce78722247c..e4e1c31267d2a 100644 --- a/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml +++ b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/microchip,mpfs-clkcfg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip PolarFire Clock Control Module Binding +title: Microchip PolarFire Clock Control Module maintainers: - Daire McNamara diff --git a/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml index 6d39344d2b705..0af1c569eb328 100644 --- a/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml +++ b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/milbeaut-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Milbeaut SoCs Clock Controller Binding +title: Milbeaut SoCs Clock Controller maintainers: - Taichi Sugaya diff --git a/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml index 771db2ddf0269..b901ca13cd25d 100644 --- a/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml +++ b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/nuvoton,npcm845-clk.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Nuvoton NPCM8XX Clock Controller Binding +title: Nuvoton NPCM8XX Clock Controller maintainers: - Tomer Maimon diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml index fccb91e78e499..cf25ba0419e21 100644 --- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/qcom,rpmhcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. RPMh Clocks Bindings +title: Qualcomm Technologies, Inc. RPMh Clocks maintainers: - Taniya Das diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml index fc7546f521c55..f809c289445e6 100644 --- a/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml +++ b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/rockchip,rk3568-cru.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROCKCHIP rk3568 Family Clock Control Module Binding +title: ROCKCHIP rk3568 Family Clock Control Module maintainers: - Elaine Zhang diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml index 9cd42a64b13ef..d0aecde2b89b1 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/cpufreq/cpufreq-mediatek-hw.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek's CPUFREQ Bindings +title: MediaTek's CPUFREQ maintainers: - Hector Yuan diff --git a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml index 3c00ad09eeaab..9c086eac6ca71 100644 --- a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml +++ b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/cpufreq/qcom-cpufreq-nvmem.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. NVMEM CPUFreq bindings +title: Qualcomm Technologies, Inc. NVMEM CPUFreq maintainers: - Ilia Lin diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml index b72e4858f9aab..50b2c2e0c3cd8 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/crypto/st,stm32-crc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 CRC bindings +title: STMicroelectronics STM32 CRC maintainers: - Lionel Debieve diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml index 6759c5bf3e572..0ddeb8a9a7a01 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/crypto/st,stm32-cryp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 CRYP bindings +title: STMicroelectronics STM32 CRYP description: The STM32 CRYP block is built on the CRYP block found in the STn8820 SoC introduced in 2007, and subsequently used in the U8500 diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml index 10ba94792d951..4ccb335e8063c 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/crypto/st,stm32-hash.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 HASH bindings +title: STMicroelectronics STM32 HASH maintainers: - Lionel Debieve diff --git a/Documentation/devicetree/bindings/display/arm,hdlcd.yaml b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml index a2670258c48d3..9a30e9005e8a2 100644 --- a/Documentation/devicetree/bindings/display/arm,hdlcd.yaml +++ b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/arm,hdlcd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Arm HDLCD display controller binding +title: Arm HDLCD display controller maintainers: - Liviu Dudau diff --git a/Documentation/devicetree/bindings/display/arm,malidp.yaml b/Documentation/devicetree/bindings/display/arm,malidp.yaml index 2a17ec6fc97c0..91812573fd080 100644 --- a/Documentation/devicetree/bindings/display/arm,malidp.yaml +++ b/Documentation/devicetree/bindings/display/arm,malidp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/arm,malidp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Arm Mali Display Processor (Mali-DP) binding +title: Arm Mali Display Processor (Mali-DP) maintainers: - Liviu Dudau diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml index ed280053ec62b..140927884418f 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/toshiba,tc358767.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Toshiba TC358767 eDP bridge bindings +title: Toshiba TC358767 eDP bridge maintainers: - Andrey Gusakov diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml index 10471c6c1ff91..d879c700594a6 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/toshiba,tc358775.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Toshiba TC358775 DSI to LVDS bridge bindings +title: Toshiba TC358775 DSI to LVDS bridge maintainers: - Vinay Simha BN diff --git a/Documentation/devicetree/bindings/display/panel/display-timings.yaml b/Documentation/devicetree/bindings/display/panel/display-timings.yaml index 6d30575819d3b..dc5f7e36e30bc 100644 --- a/Documentation/devicetree/bindings/display/panel/display-timings.yaml +++ b/Documentation/devicetree/bindings/display/panel/display-timings.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/display-timings.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: display timings bindings +title: display timings maintainers: - Thierry Reding diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml index 229e3b36ee29a..0d317e61edd8f 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/panel-timing.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: panel timing bindings +title: panel timing maintainers: - Thierry Reding diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml index 55faab6a468eb..158c791d7caa6 100644 --- a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml +++ b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/st,stm32-dma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 DMA Controller bindings +title: STMicroelectronics STM32 DMA Controller description: | The STM32 DMA is a general-purpose direct memory access controller capable of diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml index 1e1d8549b7ef4..3e0b82d277cac 100644 --- a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml +++ b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/st,stm32-dmamux.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 DMA MUX (DMA request router) bindings +title: STMicroelectronics STM32 DMA MUX (DMA request router) maintainers: - Amelie Delaunay diff --git a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml index 00cfa39136521..08a59bd69a2f4 100644 --- a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml +++ b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/dma/st,stm32-mdma.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 MDMA Controller bindings +title: STMicroelectronics STM32 MDMA Controller description: | The STM32 MDMA is a general-purpose direct memory access controller capable of diff --git a/Documentation/devicetree/bindings/edac/dmc-520.yaml b/Documentation/devicetree/bindings/edac/dmc-520.yaml index 3b6842e92d1b9..84db3966662ac 100644 --- a/Documentation/devicetree/bindings/edac/dmc-520.yaml +++ b/Documentation/devicetree/bindings/edac/dmc-520.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/edac/dmc-520.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ARM DMC-520 EDAC bindings +title: ARM DMC-520 EDAC maintainers: - Lei Wang diff --git a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml index 1c0388da6721a..176796931a22f 100644 --- a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml +++ b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/firmware/arm,scmi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: System Control and Management Interface (SCMI) Message Protocol bindings +title: System Control and Management Interface (SCMI) Message Protocol maintainers: - Sudeep Holla diff --git a/Documentation/devicetree/bindings/firmware/arm,scpi.yaml b/Documentation/devicetree/bindings/firmware/arm,scpi.yaml index 1f9322925e7c9..241317239ffcf 100644 --- a/Documentation/devicetree/bindings/firmware/arm,scpi.yaml +++ b/Documentation/devicetree/bindings/firmware/arm,scpi.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/firmware/arm,scpi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: System Control and Power Interface (SCPI) Message Protocol bindings +title: System Control and Power Interface (SCPI) Message Protocol maintainers: - Sudeep Holla diff --git a/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml b/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml index fcf0011b8e6db..3faae32366656 100644 --- a/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml +++ b/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/firmware/qemu,fw-cfg-mmio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: QEMU Firmware Configuration bindings +title: QEMU Firmware Configuration maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml b/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml index cb8a5c376e1e2..157969bc4c463 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/gpio/gpio-tpic2810.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TPIC2810 GPIO controller bindings +title: TPIC2810 GPIO controller maintainers: - Aswath Govindraju diff --git a/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml b/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml index 7087e4a5013f6..bd721c8390592 100644 --- a/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/gpio/ti,omap-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OMAP GPIO controller bindings +title: OMAP GPIO controller maintainers: - Grygorii Strashko diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml index 217c42874f418..dae55b8a267b0 100644 --- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml +++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/gpu/brcm,bcm-v3d.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom V3D GPU Bindings +title: Broadcom V3D GPU maintainers: - Eric Anholt diff --git a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml index 93e7244cdc0eb..b1b10ea70ad9a 100644 --- a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml +++ b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/gpu/vivante,gc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Vivante GPU Bindings +title: Vivante GPU description: Vivante GPU core devices diff --git a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml index b18c616035a8b..829d1fdf4c674 100644 --- a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml +++ b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/hwlock/st,stm32-hwspinlock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Hardware Spinlock bindings +title: STMicroelectronics STM32 Hardware Spinlock maintainers: - Fabien Dessenne diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml index d0d5497492087..ae4f68d4e6966 100644 --- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml +++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/hwmon/moortec,mr75203.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Moortec Semiconductor MR75203 PVT Controller bindings +title: Moortec Semiconductor MR75203 PVT Controller maintainers: - Rahul Tanwar diff --git a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml index 015885dd02d32..31386a8d76845 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i2c/i2c-pxa.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell MMP I2C controller bindings +title: Marvell MMP I2C controller maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml index 42c5974ec7b04..16024415a4a74 100644 --- a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i2c/st,nomadik-i2c.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ST Microelectronics Nomadik I2C Bindings +title: ST Microelectronics Nomadik I2C description: The Nomadik I2C host controller began its life in the ST Microelectronics STn8800 SoC, and was then inherited into STn8810 and diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index 1f82fc9237996..fdb4212149e79 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i3c/i3c.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: I3C bus binding +title: I3C bus maintainers: - Alexandre Belloni diff --git a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml index 698beb896f76a..517e8b1fcb739 100644 --- a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml @@ -5,7 +5,7 @@ $id: "http://devicetree.org/schemas/iio/adc/ingenic,adc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Ingenic JZ47xx ADC controller IIO bindings +title: Ingenic JZ47xx ADC controller IIO maintainers: - Artur Rojek diff --git a/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml b/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml index a6cb857a232da..9ceb6f18c854f 100644 --- a/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/motorola,cpcap-adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Motorola CPCAP PMIC ADC binding +title: Motorola CPCAP PMIC ADC maintainers: - Tony Lindgren diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml index 9c59a20a60327..63369ba388e47 100644 --- a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/nxp,imx8qxp-adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP IMX8QXP ADC bindings +title: NXP IMX8QXP ADC maintainers: - Cai Huoqing diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml index 43abb300fa3d3..70b38038a0805 100644 --- a/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/nxp,lpc1850-adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP LPC1850 ADC bindings +title: NXP LPC1850 ADC maintainers: - Jonathan Cameron diff --git a/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml b/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml index 44aa28b59197a..8181cf9a8e07d 100644 --- a/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/sprd,sc2720-adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Spreadtrum SC27XX series PMICs ADC binding +title: Spreadtrum SC27XX series PMICs ADC maintainers: - Baolin Wang diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml index 05265f381fde9..1c340c95df160 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/iio/adc/st,stm32-adc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 ADC bindings +title: STMicroelectronics STM32 ADC description: | STM32 ADC is a successive approximation analog-to-digital converter. diff --git a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml index d6d3d85901715..d40689f233f22 100644 --- a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/x-powers,axp209-adc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: X-Powers AXP ADC bindings +title: X-Powers AXP ADC maintainers: - Chen-Yu Tsai diff --git a/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml b/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml index 595f481c548e0..9c8afe3f1b690 100644 --- a/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml +++ b/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/dac/nxp,lpc1850-dac.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP LPC1850 DAC bindings +title: NXP LPC1850 DAC maintainers: - Jonathan Cameron diff --git a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml index 6adeda4087fc2..0f1bf1110122a 100644 --- a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml +++ b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/iio/dac/st,stm32-dac.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 DAC bindings +title: STMicroelectronics STM32 DAC description: | The STM32 DAC is a 12-bit voltage output digital-to-analog converter. The DAC diff --git a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml index 611ad4444cf0c..c55831b60ee67 100644 --- a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml +++ b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/multiplexer/io-channel-mux.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: I/O channel multiplexer bindings +title: I/O channel multiplexer maintainers: - Peter Rosin diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml index 3225c8d1fdaf0..86a6d18f952a0 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/cypress,cy8ctma140.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Cypress CY8CTMA140 series touchscreen controller bindings +title: Cypress CY8CTMA140 series touchscreen controller maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml index 762e56ee90cd2..4dfbb93678b56 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/cypress,cy8ctma340.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Cypress CY8CTMA340 series touchscreen controller bindings +title: Cypress CY8CTMA340 series touchscreen controller description: The Cypress CY8CTMA340 series (also known as "CYTTSP" after the marketing name Cypress TrueTouch Standard Product) touchscreens can diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 46bc8c028fe69..ef4c841387bdd 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/edt-ft5x06.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: FocalTech EDT-FT5x06 Polytouch Bindings +title: FocalTech EDT-FT5x06 Polytouch description: | There are 5 variants of the chip for various touch panel sizes diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml index 19ac9da421df3..3d016b87c8df8 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/goodix.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Goodix GT9xx series touchscreen controller Bindings +title: Goodix GT9xx series touchscreen controller maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml b/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml index be2ba185c086b..f42b23d532eb0 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/himax,hx83112b.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Himax hx83112b touchscreen controller bindings +title: Himax hx83112b touchscreen controller maintainers: - Job Noorman diff --git a/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml b/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml index 942562f1e45b2..874c0781c4769 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/hycon,hy46xx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Hycon HY46XX series touchscreen controller bindings +title: Hycon HY46XX series touchscreen controller description: | There are 6 variants of the chip for various touch panel sizes and cover lens material diff --git a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml index e3a2b871e50c6..0d6b033fd5fbc 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/imagis,ist3038c.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Imagis IST30XXC family touchscreen controller bindings +title: Imagis IST30XXC family touchscreen controller maintainers: - Markuss Broks diff --git a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml index 62366886fb3e1..fdd02898e2492 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/melfas,mms114.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Melfas MMS114 family touchscreen controller bindings +title: Melfas MMS114 family touchscreen controller maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml b/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml index af4f954de958d..ddbbc820c7e55 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/mstar,msg2638.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MStar msg2638 touchscreen controller Bindings +title: MStar msg2638 touchscreen controller maintainers: - Vincent Knecht diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml index 938aab016cc26..7187c390b2f57 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings +title: Texas Instruments TSC2004 and TSC2005 touchscreen controller maintainers: - Marek Vasut diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml index 4b5b212c772c4..895592da96263 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/touchscreen.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common touchscreen Bindings +title: Common touchscreen maintainers: - Dmitry Torokhov diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml index b4e5ba7c0b49e..b1507463a03e9 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/touchscreen/zinitix,bt400.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Zinitix BT4xx and BT5xx series touchscreen controller bindings +title: Zinitix BT4xx and BT5xx series touchscreen controller description: The Zinitix BT4xx and BT5xx series of touchscreen controllers are Korea-produced touchscreens with embedded microcontrollers. The diff --git a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml index 5a583bf3dbc10..9acc21028413e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/mrvl,intc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell MMP/Orion Interrupt controller bindings +title: Marvell MMP/Orion Interrupt controller maintainers: - Andrew Lunn diff --git a/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml index 9ce6804bdb999..2d6307a383ad4 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/nuvoton,wpcm450-aic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Nuvoton WPCM450 Advanced Interrupt Controller bindings +title: Nuvoton WPCM450 Advanced Interrupt Controller maintainers: - Jonathan Neuschäfer diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml b/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml index 71bc031c4fde6..3f25cdb4e99bd 100644 --- a/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml +++ b/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/ipmi/ipmi-ipmb.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: IPMI IPMB device bindings +title: IPMI IPMB device description: IPMI IPMB device bindings diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml b/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml index 898e3267893ac..c1b4bf95ef99f 100644 --- a/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml +++ b/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/ipmi/ipmi-smic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: IPMI device bindings +title: IPMI device description: IPMI device bindings diff --git a/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml index 3300451fcfd5e..584030b6b0b95 100644 --- a/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/backlight/gpio-backlight.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: gpio-backlight bindings +title: gpio-backlight maintainers: - Lee Jones diff --git a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml index 0793d0adc4ec9..d7b78198abc22 100644 --- a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/backlight/led-backlight.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: led-backlight bindings +title: led-backlight maintainers: - Lee Jones diff --git a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml index 78fbe20a17580..5ec47a8c6568b 100644 --- a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/backlight/pwm-backlight.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: pwm-backlight bindings +title: pwm-backlight maintainers: - Lee Jones diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml index f24fd84b4b05c..c71b7c065b825 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/mailbox/qcom,apcs-kpss-global.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm APCS global block bindings +title: Qualcomm APCS global block description: This binding describes the APCS "global" block found in various Qualcomm diff --git a/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml index 80feba82cbd61..bdfb4a8220c54 100644 --- a/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/mailbox/sprd-mailbox.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Spreadtrum mailbox controller bindings +title: Spreadtrum mailbox controller maintainers: - Orson Zhai diff --git a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml index 2c8b47285aa34..0dfe05a04dd00 100644 --- a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/mailbox/st,stm32-ipcc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 IPC controller bindings +title: STMicroelectronics STM32 IPC controller description: The IPCC block provides a non blocking signaling mechanism to post and diff --git a/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml index 0e3478551e137..de3e483f146a0 100644 --- a/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml +++ b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/media/marvell,mmp2-ccic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell MMP2 camera host interface bindings +title: Marvell MMP2 camera host interface maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/media/renesas,ceu.yaml b/Documentation/devicetree/bindings/media/renesas,ceu.yaml index 50e0740af15a7..d527fc42c3fd1 100644 --- a/Documentation/devicetree/bindings/media/renesas,ceu.yaml +++ b/Documentation/devicetree/bindings/media/renesas,ceu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/renesas,ceu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Renesas Capture Engine Unit (CEU) Bindings +title: Renesas Capture Engine Unit (CEU) maintainers: - Jacopo Mondi diff --git a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml index 77144cc6f7db9..7f545a587a392 100644 --- a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml +++ b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/st,stm32-cec.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 CEC bindings +title: STMicroelectronics STM32 CEC maintainers: - Yannick Fertre diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml index e80fcdf280f0b..6b3e413cedb25 100644 --- a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml +++ b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/st,stm32-dcmi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) binding +title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) maintainers: - Hugues Fruchet diff --git a/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml b/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml index f97b4a2466056..4afa4a24b8685 100644 --- a/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml +++ b/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/media/st,stm32-dma2d.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Chrom-Art Accelerator DMA2D binding +title: STMicroelectronics STM32 Chrom-Art Accelerator DMA2D description: Chrom-ART Accelerator(DMA2D), graphical hardware accelerator diff --git a/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml b/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml index 96d563fd61f58..e42aa488704d4 100644 --- a/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/calxeda-ddr-ctrlr.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Calxeda DDR memory controller binding +title: Calxeda DDR memory controller description: | The Calxeda DDR memory controller is initialised and programmed by the diff --git a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml index d71af02b7f16c..e76ba767dfd22 100644 --- a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/memory-controllers/st,stm32-fmc2-ebi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings +title: STMicroelectronics Flexible Memory Controller 2 (FMC2) description: | The FMC2 functional block makes the interface with: synchronous and diff --git a/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml b/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml index dd43a0c766f34..c3a368a0fe935 100644 --- a/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml +++ b/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/actions,atc260x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Actions Semi ATC260x Power Management IC bindings +title: Actions Semi ATC260x Power Management IC maintainers: - Manivannan Sadhasivam diff --git a/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml index 08af356f5d275..9b11b6e2bbf74 100644 --- a/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml +++ b/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/ene-kb3930.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ENE KB3930 Embedded Controller bindings +title: ENE KB3930 Embedded Controller description: | This binding describes the ENE KB3930 Embedded Controller attached to an diff --git a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml index 06ed9ec8f4bbc..aa29e3f4655e9 100644 --- a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml +++ b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/ene-kb930.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ENE KB930 Embedded Controller bindings +title: ENE KB930 Embedded Controller description: | This binding describes the ENE KB930 Embedded Controller attached to an diff --git a/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml b/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml index f09577105b507..20067002cc4a3 100644 --- a/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml +++ b/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/fsl,imx8qxp-csr.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale i.MX8qm/qxp Control and Status Registers Module Bindings +title: Freescale i.MX8qm/qxp Control and Status Registers Module maintainers: - Liu Ying diff --git a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml index ec3138c1bbfc1..e6a2387d8650e 100644 --- a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml +++ b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/qcom,pm8008.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. PM8008 PMIC bindings +title: Qualcomm Technologies, Inc. PM8008 PMIC maintainers: - Guru Das Srinagesh diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml index fbface720678c..5fbb94d2e4fae 100644 --- a/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/rohm,bd71815-pmic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM BD71815 Power Management Integrated Circuit bindings +title: ROHM BD71815 Power Management Integrated Circuit maintainers: - Matti Vaittinen diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml index 8380166d176cd..d15ea8e9acad7 100644 --- a/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/rohm,bd71828-pmic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM BD71828 Power Management Integrated Circuit bindings +title: ROHM BD71828 Power Management Integrated Circuit maintainers: - Matti Vaittinen diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml index 3bfdd33702ad1..4aca765caee24 100644 --- a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/rohm,bd71837-pmic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM BD71837 Power Management Integrated Circuit bindings +title: ROHM BD71837 Power Management Integrated Circuit maintainers: - Matti Vaittinen diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml index 5d531051a1537..e6491729715f9 100644 --- a/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/rohm,bd71847-pmic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM BD71847 and BD71850 Power Management Integrated Circuit bindings +title: ROHM BD71847 and BD71850 Power Management Integrated Circuit maintainers: - Matti Vaittinen diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml index 6483860da9554..34ff0a322f3a1 100644 --- a/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/rohm,bd9576-pmic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM BD9576MUF and BD9573MUF Power Management Integrated Circuit bindings +title: ROHM BD9576MUF and BD9573MUF Power Management Integrated Circuit maintainers: - Matti Vaittinen diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml index d950dd5d48bd6..27329c5dc38e6 100644 --- a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/st,stm32-lptimer.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Low-Power Timers bindings +title: STMicroelectronics STM32 Low-Power Timers description: | The STM32 Low-Power Timer (LPTIM) is a 16-bit timer that provides several diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml index e2c3c3b44abba..f84e09a5743b7 100644 --- a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/st,stm32-timers.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Timers bindings +title: STMicroelectronics STM32 Timers description: | This hardware block provides 3 types of timer along with PWM functionality: diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml index b4d54302582fb..76551c90b128e 100644 --- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/st,stmfx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectonics Multi-Function eXpander (STMFX) bindings +title: STMicroelectonics Multi-Function eXpander (STMFX) description: ST Multi-Function eXpander (STMFX) is a slave controller using I2C for communication with the main MCU. Its main features are GPIO expansion, diff --git a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml index 426658ad81d46..9573e4af949ee 100644 --- a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml +++ b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mfd/st,stpmic1.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectonics STPMIC1 Power Management IC bindings +title: STMicroelectonics STPMIC1 Power Management IC description: STMicroelectronics STPMIC1 Power Management IC diff --git a/Documentation/devicetree/bindings/mips/cpus.yaml b/Documentation/devicetree/bindings/mips/cpus.yaml index e991f4c6668da..cf382dea3922c 100644 --- a/Documentation/devicetree/bindings/mips/cpus.yaml +++ b/Documentation/devicetree/bindings/mips/cpus.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mips/cpus.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MIPS CPUs bindings +title: MIPS CPUs maintainers: - Thomas Bogendoerfer diff --git a/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml index b3c45c046ba5e..e99342f268a64 100644 --- a/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml +++ b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/misc/olpc,xo1.75-ec.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: OLPC XO-1.75 Embedded Controller bindings +title: OLPC XO-1.75 Embedded Controller description: | This binding describes the Embedded Controller acting as a SPI bus master diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml index dead421e17d62..c028039bc477c 100644 --- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml +++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/brcm,sdhci-brcmstb.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BRCMSTB/BMIPS SDHCI Controller binding +title: Broadcom BRCMSTB/BMIPS SDHCI Controller maintainers: - Al Cooper diff --git a/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml b/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml index 69ff065c9a39a..fa6cfe092fc9b 100644 --- a/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/microchip,dw-sparx5-sdhci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip Sparx5 Mobile Storage Host Controller Binding +title: Microchip Sparx5 Mobile Storage Host Controller allOf: - $ref: "mmc-controller.yaml" diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml index 1fc7e620f3280..911a5996e0991 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-emmc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Simple eMMC hardware reset provider binding +title: Simple eMMC hardware reset provider maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml index 9e23967510304..3397dbff88c2e 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-sd8787.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell SD8787 power sequence provider binding +title: Marvell SD8787 power sequence provider maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml index 226fb191913d2..64e3644eefeb0 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-simple.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Simple MMC power sequence provider binding +title: Simple MMC power sequence provider maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml index 8ed94a12a03b7..7a649ebc688c8 100644 --- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/mtk-sd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MTK MSDC Storage Host Controller Binding +title: MTK MSDC Storage Host Controller maintainers: - Chaotian Jing diff --git a/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml index 1c87f4218e18c..3d46c25257877 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/sdhci-pxa.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell PXA SDHCI v2/v3 bindings +title: Marvell PXA SDHCI v2/v3 maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml index 51ba44cad8426..a43eb837f8dae 100644 --- a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/snps,dwcmshc-sdhci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Synopsys Designware Mobile Storage Host Controller Binding +title: Synopsys Designware Mobile Storage Host Controller maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml index e1f5f26f3f1c6..b13b5166d20a8 100644 --- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml +++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mmc/synopsys-dw-mshc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Synopsys Designware Mobile Storage Host Controller Binding +title: Synopsys Designware Mobile Storage Host Controller maintainers: - Ulf Hansson diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml index 849aeae319a92..8487089b6e169 100644 --- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/gpmi-nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale General-Purpose Media Interface (GPMI) binding +title: Freescale General-Purpose Media Interface (GPMI) maintainers: - Han Xu diff --git a/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml b/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml index ea9450fe7c9fd..00882892f47eb 100644 --- a/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml +++ b/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/mtd/microchip,mchp48l640.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Microchip 48l640 (and similar) serial EERAM bindings +title: Microchip 48l640 (and similar) serial EERAM maintainers: - Heiko Schocher diff --git a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml index 66da1b476ab7e..7f6f7c9596c49 100644 --- a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/mxc-nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale's mxc_nand binding +title: Freescale's mxc_nand maintainers: - Uwe Kleine-König diff --git a/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml b/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml index 61d12bda356e6..1c2b4e780ca9a 100644 --- a/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml +++ b/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/partitions/qcom,smem-part.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm SMEM NAND flash partition parser binding +title: Qualcomm SMEM NAND flash partition parser maintainers: - Manivannan Sadhasivam diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml index 8cbfa1504a0f1..19cf1f18b61c3 100644 --- a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mtd/st,stm32-fmc2-nand.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings +title: STMicroelectronics Flexible Memory Controller 2 (FMC2) maintainers: - Christophe Kerello diff --git a/Documentation/devicetree/bindings/mux/gpio-mux.yaml b/Documentation/devicetree/bindings/mux/gpio-mux.yaml index ee4de9fbaf4d6..b597c1f2c5772 100644 --- a/Documentation/devicetree/bindings/mux/gpio-mux.yaml +++ b/Documentation/devicetree/bindings/mux/gpio-mux.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mux/gpio-mux.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: GPIO-based multiplexer controller bindings +title: GPIO-based multiplexer controller maintainers: - Peter Rosin diff --git a/Documentation/devicetree/bindings/mux/mux-consumer.yaml b/Documentation/devicetree/bindings/mux/mux-consumer.yaml index d3d854967359d..9e2d78a78e409 100644 --- a/Documentation/devicetree/bindings/mux/mux-consumer.yaml +++ b/Documentation/devicetree/bindings/mux/mux-consumer.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mux/mux-consumer.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common multiplexer controller consumer bindings +title: Common multiplexer controller consumer maintainers: - Peter Rosin diff --git a/Documentation/devicetree/bindings/mux/mux-controller.yaml b/Documentation/devicetree/bindings/mux/mux-controller.yaml index c855fbad38844..8b943082a2416 100644 --- a/Documentation/devicetree/bindings/mux/mux-controller.yaml +++ b/Documentation/devicetree/bindings/mux/mux-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mux/mux-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common multiplexer controller provider bindings +title: Common multiplexer controller provider maintainers: - Peter Rosin diff --git a/Documentation/devicetree/bindings/mux/reg-mux.yaml b/Documentation/devicetree/bindings/mux/reg-mux.yaml index dfd9ea582bb7a..dc4be092fc2fc 100644 --- a/Documentation/devicetree/bindings/mux/reg-mux.yaml +++ b/Documentation/devicetree/bindings/mux/reg-mux.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/mux/reg-mux.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic register bitfield-based multiplexer controller bindings +title: Generic register bitfield-based multiplexer controller maintainers: - Peter Rosin diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml index e5af53508e254..c99034f053e82 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml +++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/brcm,bcmgenet.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom BCM7xxx Ethernet Controller (GENET) binding +title: Broadcom BCM7xxx Ethernet Controller (GENET) maintainers: - Doug Berger diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index 26aa0830eea1a..67879aab623b5 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/bosch,m_can.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bosch MCAN controller Bindings +title: Bosch MCAN controller description: Bosch MCAN controller for CAN bus diff --git a/Documentation/devicetree/bindings/net/can/can-transceiver.yaml b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml index d1ef1fe6ab290..d422b3921ffa5 100644 --- a/Documentation/devicetree/bindings/net/can/can-transceiver.yaml +++ b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/can/can-transceiver.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: CAN transceiver Bindings +title: CAN transceiver description: CAN transceiver generic properties bindings diff --git a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml index a6921e805e37e..4116667133ce9 100644 --- a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml +++ b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/engleder,tsnep.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TSN endpoint Ethernet MAC binding +title: TSN endpoint Ethernet MAC maintainers: - Gerhard Engleder diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index 600240281e8c5..6e0763898d3a8 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/fsl,qoriq-mc-dpmac.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DPAA2 MAC bindings +title: DPAA2 MAC maintainers: - Ioana Ciornei diff --git a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml index afd11c9422fa2..8438af53c5c3d 100644 --- a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml +++ b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/mctp-i2c-controller.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MCTP I2C transport binding +title: MCTP I2C transport maintainers: - Matt Johnston diff --git a/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml index d58e1571df9b4..e68ed9423150e 100644 --- a/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml +++ b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml @@ -6,7 +6,7 @@ $id: http://devicetree.org/schemas/net/wireless/ieee80211.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common IEEE 802.11 Binding +title: Common IEEE 802.11 maintainers: - Lorenzo Bianconi diff --git a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml index bf84768228f56..fe2cd7f1afba9 100644 --- a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/ingenic,jz4780-efuse.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic JZ EFUSE driver bindings +title: Ingenic JZ EFUSE driver maintainers: - PrasannaKumar Muralidharan diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 2eab2f46cb65a..8e89b15b535fa 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/qcom,qfprom.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies Inc, QFPROM Efuse bindings +title: Qualcomm Technologies Inc, QFPROM Efuse maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml index 2578e39deda90..73a0c658dbfd0 100644 --- a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/socionext,uniphier-efuse.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Socionext UniPhier eFuse bindings +title: Socionext UniPhier eFuse maintainers: - Keiji Hayashibara diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml index 16f4cad2fa551..172597cc5c63b 100644 --- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml +++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/st,stm32-romem.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Factory-programmed data bindings +title: STMicroelectronics STM32 Factory-programmed data description: | This represents STM32 Factory-programmed read only non-volatile area: locked diff --git a/Documentation/devicetree/bindings/opp/opp-v1.yaml b/Documentation/devicetree/bindings/opp/opp-v1.yaml index d585d536a3fb4..07e26c2678153 100644 --- a/Documentation/devicetree/bindings/opp/opp-v1.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v1.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/opp/opp-v1.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic OPP (Operating Performance Points) v1 Bindings +title: Generic OPP (Operating Performance Points) v1 maintainers: - Viresh Kumar diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml index a202b6c6561d8..60cf3cbde4c5b 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/opp/opp-v2-kryo-cpu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. NVMEM OPP bindings +title: Qualcomm Technologies, Inc. NVMEM OPP maintainers: - Ilia Lin diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml index 2f05920fce79a..6972d76233aa4 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/opp/opp-v2.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic OPP (Operating Performance Points) Bindings +title: Generic OPP (Operating Performance Points) maintainers: - Viresh Kumar diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml index 977c976ea7994..8d7eb51edcb45 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pci/qcom,pcie-ep.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm PCIe Endpoint Controller binding +title: Qualcomm PCIe Endpoint Controller maintainers: - Manivannan Sadhasivam diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 5ba9570ad7bf2..e6f9f5540cc3f 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/fsl,imx8mq-usb-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale i.MX8MQ USB3 PHY binding +title: Freescale i.MX8MQ USB3 PHY maintainers: - Li Jun diff --git a/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml index 4d91e2f4f247d..ff9f9ca0f19cc 100644 --- a/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/fsl,lynx-28g.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale Lynx 28G SerDes PHY binding +title: Freescale Lynx 28G SerDes PHY maintainers: - Ioana Ciornei diff --git a/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml index 52815b6c2b88d..5cee4c85ff8bb 100644 --- a/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml +++ b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/intel,keembay-phy-usb.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel Keem Bay USB PHY bindings +title: Intel Keem Bay USB PHY maintainers: - Wan Ahmad Zainie diff --git a/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml index b09e5ba5e127d..361ffc35b16bf 100644 --- a/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml +++ b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/intel,phy-thunderbay-emmc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel Thunder Bay eMMC PHY bindings +title: Intel Thunder Bay eMMC PHY maintainers: - Srikandan Nandhini diff --git a/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml index c97043eaa8fbe..be13113f7b47f 100644 --- a/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/phy/marvell,mmp3-usb-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell MMP3 USB PHY bindings +title: Marvell MMP3 USB PHY maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml index 9c2a7345955d4..26f2b887cfc1e 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/phy/mediatek,dsi-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek MIPI Display Serial Interface (DSI) PHY binding +title: MediaTek MIPI Display Serial Interface (DSI) PHY maintainers: - Chun-Kuang Hu diff --git a/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml index 0d94950b84ca3..6cfdaadec0859 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/phy/mediatek,hdmi-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek High Definition Multimedia Interface (HDMI) PHY binding +title: MediaTek High Definition Multimedia Interface (HDMI) PHY maintainers: - Chun-Kuang Hu diff --git a/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml index 74cc32c1d2e85..3e62b5d4da616 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/phy/mediatek,ufs-phy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: MediaTek Universal Flash Storage (UFS) M-PHY binding +title: MediaTek Universal Flash Storage (UFS) M-PHY maintainers: - Stanley Chu diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml index a9e227d8b076a..6a09472740ed9 100644 --- a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml +++ b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/phy/phy-cadence-sierra.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Cadence Sierra PHY binding +title: Cadence Sierra PHY description: This binding describes the Cadence Sierra PHY. Sierra PHY supports multilink diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml index 2fec9e54ad0e6..2ad1faadda2a2 100644 --- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml +++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/phy/phy-cadence-torrent.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Cadence Torrent SD0801 PHY binding +title: Cadence Torrent SD0801 PHY description: This binding describes the Cadence SD0801 PHY (also known as Torrent PHY) diff --git a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml index 801993813b185..5b4c915cc9e56 100644 --- a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml +++ b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/phy-stm32-usbphyc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 USB HS PHY controller binding +title: STMicroelectronics STM32 USB HS PHY controller description: diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml index 4dc5205d893ba..445b2467f4f6b 100644 --- a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml +++ b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/phy/phy-tegra194-p2u.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: NVIDIA Tegra194 & Tegra234 P2U binding +title: NVIDIA Tegra194 & Tegra234 P2U maintainers: - Thierry Reding diff --git a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml index 62dcb84c08aa0..738c92bb75187 100644 --- a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml +++ b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/ti,phy-am654-serdes.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: TI AM654 SERDES binding +title: TI AM654 SERDES description: This binding describes the TI AM654 SERDES. AM654 SERDES can be configured diff --git a/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml b/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml index 51492fe738ec9..617f3c0b3dfb6 100644 --- a/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml +++ b/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/transmit-amplitude.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common PHY and network PCS transmit amplitude property binding +title: Common PHY and network PCS transmit amplitude property description: Binding describing the peak-to-peak transmit amplitude for common PHYs diff --git a/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml index b425483501886..ca0fef6e535e2 100644 --- a/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml +++ b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pinctrl/intel,lgm-io.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel Lightning Mountain SoC pinmux & GPIO controller binding +title: Intel Lightning Mountain SoC pinmux & GPIO controller maintainers: - Rahul Tanwar diff --git a/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml b/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml index 301db7daf8706..2fd2178d1fa55 100644 --- a/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml +++ b/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/avs/qcom,cpr.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Core Power Reduction (CPR) bindings +title: Qualcomm Core Power Reduction (CPR) maintainers: - Niklas Cassel diff --git a/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml b/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml index b289388952bf2..85bebebb285bd 100644 --- a/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/dlg,da9150-charger.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Dialog Semiconductor DA9150 Charger Power Supply bindings +title: Dialog Semiconductor DA9150 Charger Power Supply maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml b/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml index d47caf59d2046..7cc94b8729379 100644 --- a/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml +++ b/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/dlg,da9150-fuel-gauge.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Dialog Semiconductor DA9150 Fuel-Gauge Power Supply bindings +title: Dialog Semiconductor DA9150 Fuel-Gauge Power Supply maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml index 46527038bf227..534388b255a98 100644 --- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/ingenic,battery.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Ingenic JZ47xx battery bindings +title: Ingenic JZ47xx battery maintainers: - Artur Rojek diff --git a/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml b/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml index 76cedf95a12c9..d26ed5eabe28c 100644 --- a/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/lltc,lt3651-charger.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Analog Devices LT3651 Charger Power Supply bindings +title: Analog Devices LT3651 Charger Power Supply maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml index eeb043f9bb4f2..a7344815ce995 100644 --- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml +++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/sc2731-charger.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Spreadtrum SC2731 PMICs battery charger binding +title: Spreadtrum SC2731 PMICs battery charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml index d90a838a17441..de43e45a43b7c 100644 --- a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml +++ b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/sc27xx-fg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Spreadtrum SC27XX PMICs Fuel Gauge Unit Power Supply Bindings +title: Spreadtrum SC27XX PMICs Fuel Gauge Unit Power Supply maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml index cd8e9a8907f84..70d563d44c356 100644 --- a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml +++ b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Microchip IP corePWM controller bindings +title: Microchip IP corePWM controller maintainers: - Conor Dooley diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml index 38bdaef4fa397..c82f6f885d97d 100644 --- a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml +++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/regulator/st,stm32-booster.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 booster for ADC analog input switches bindings +title: STMicroelectronics STM32 booster for ADC analog input switches maintainers: - Fabrice Gasnier diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml index a5a27ee0a9e67..c1bf1f90490a0 100644 --- a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml +++ b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/regulator/st,stm32-vrefbuf.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Voltage reference buffer bindings +title: STMicroelectronics STM32 Voltage reference buffer description: | Some STM32 devices embed a voltage reference buffer which can be used as diff --git a/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml b/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml index d892d29a656b3..11cb42a3fdd1a 100644 --- a/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/remoteproc/amlogic,meson-mx-ao-arc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Amlogic Meson AO ARC Remote Processor bindings +title: Amlogic Meson AO ARC Remote Processor description: Amlogic Meson6, Meson8, Meson8b and Meson8m2 SoCs embed an ARC core diff --git a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml index 3a1f59ad79e23..e732255b90196 100644 --- a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/remoteproc/fsl,imx-rproc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: NXP i.MX Co-Processor Bindings +title: NXP i.MX Co-Processor description: This binding provides support for ARM Cortex M4 Co-processor found on some NXP iMX SoCs. diff --git a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml index aaaaabad46ea6..85b1e43cab08d 100644 --- a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/remoteproc/ingenic,vpu.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Ingenic Video Processing Unit bindings +title: Ingenic Video Processing Unit description: Inside the Video Processing Unit (VPU) of the recent JZ47xx SoCs from diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml index 7e091eaffc184..895415772d1d6 100644 --- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/remoteproc/mtk,scp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Mediatek SCP Bindings +title: Mediatek SCP maintainers: - Tinghan Shen diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml index db9e0f0c2bea0..c1d9cbc359b4e 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/remoteproc/qcom,adsp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm ADSP Peripheral Image Loader binding +title: Qualcomm ADSP Peripheral Image Loader maintainers: - Manivannan Sadhasivam diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml index a7711e3c998cc..22219d16df204 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/remoteproc/qcom,pil-info.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm peripheral image loader relocation info binding +title: Qualcomm peripheral image loader relocation info maintainers: - Bjorn Andersson diff --git a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml index a7d25fa920e56..7e0275d31a3c2 100644 --- a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/remoteproc/renesas,rcar-rproc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Renesas R-Car remote processor controller bindings +title: Renesas R-Car remote processor controller maintainers: - Julien Massot diff --git a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml index da50f0e99fe2c..66b1e3efdaa3f 100644 --- a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/remoteproc/st,stm32-rproc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STM32 remote processor controller bindings +title: STMicroelectronics STM32 remote processor controller description: This document defines the binding for the remoteproc component that loads and diff --git a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml index 067e71e8ebe8c..9f7590ce6b3d6 100644 --- a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml +++ b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/intel,ixp46x-rng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Intel IXP46x RNG bindings +title: Intel IXP46x RNG description: | The Intel IXP46x has a random number generator at a fixed physical diff --git a/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml b/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml index 48ab82abf50ec..4673d6160ad9c 100644 --- a/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml +++ b/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/silex-insight,ba431-rng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Silex Insight BA431 RNG bindings +title: Silex Insight BA431 RNG description: | The BA431 hardware random number generator is an IP that is FIPS-140-2/3 diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml index fcd86f822a9ce..187b172d0cca5 100644 --- a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml +++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/st,stm32-rng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 RNG bindings +title: STMicroelectronics STM32 RNG description: | The STM32 hardware random number generator is a simple fixed purpose diff --git a/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml b/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml index 1e17e55762f17..d831322911705 100644 --- a/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml +++ b/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/xiphera,xip8001b-trng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Xiphera XIP8001B-trng bindings +title: Xiphera XIP8001B-trng maintainers: - Atte Tommiska diff --git a/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml b/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml index 482e5af215b3d..b04b87ef6f334 100644 --- a/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/sa1100-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell Real Time Clock controller bindings +title: Marvell Real Time Clock controller allOf: - $ref: rtc.yaml# diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml index 764717ce18733..9e66ed33cda48 100644 --- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/st,stm32-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Real Time Clock Bindings +title: STMicroelectronics STM32 Real Time Clock maintainers: - Gabriel Fernandez diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 6258f5f59b195..34b8e59aa9d4d 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serial/8250.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: UART (Universal Asynchronous Receiver/Transmitter) bindings +title: UART (Universal Asynchronous Receiver/Transmitter) maintainers: - devicetree@vger.kernel.org diff --git a/Documentation/devicetree/bindings/serial/rs485.yaml b/Documentation/devicetree/bindings/serial/rs485.yaml index 90a1bab40f050..789763cf427ac 100644 --- a/Documentation/devicetree/bindings/serial/rs485.yaml +++ b/Documentation/devicetree/bindings/serial/rs485.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serial/rs485.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: RS485 serial communications Bindings +title: RS485 serial communications description: The RTS signal is capable of automatically controlling line direction for the built-in half-duplex mode. The properties described diff --git a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml index 333dc42722d25..85876c668f6d0 100644 --- a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml +++ b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# maintainers: - Erwan Le Ray -title: STMicroelectronics STM32 USART bindings +title: STMicroelectronics STM32 USART properties: compatible: diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml index 98d087cf4fc03..ab607efbb64c1 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soc/qcom/qcom,aoss-qmp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Always-On Subsystem side channel binding +title: Qualcomm Always-On Subsystem side channel maintainers: - Bjorn Andersson diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml index a6bc3197d5ddc..6026c21736d88 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/soc/qcom/qcom,apr.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm APR/GPR (Asynchronous/Generic Packet Router) binding +title: Qualcomm APR/GPR (Asynchronous/Generic Packet Router) maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml index 4149cf2b66be3..497614ddf0058 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/soc/qcom/qcom,smem.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm Shared Memory Manager binding +title: Qualcomm Shared Memory Manager maintainers: - Andy Gross diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml index 38818c37c3ea8..aca3d40bcccbf 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/soc/qcom/qcom,spm.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm Subsystem Power Manager binding +title: Qualcomm Subsystem Power Manager maintainers: - Andy Gross diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml index 48eda4d0d391d..7ab8cfff18c19 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soc/qcom/qcom-stats.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies, Inc. (QTI) Stats bindings +title: Qualcomm Technologies, Inc. (QTI) Stats maintainers: - Maulik Shah diff --git a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml index 60b49562ff694..a6836904a4f83 100644 --- a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml +++ b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/soc/samsung/exynos-usi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Samsung's Exynos USI (Universal Serial Interface) binding +title: Samsung's Exynos USI (Universal Serial Interface) maintainers: - Sam Protsenko diff --git a/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml b/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml index 92d896e0d3238..f302fe89a2536 100644 --- a/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml +++ b/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/marvell,mmp-sspa.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvel SSPA Digital Audio Interface Bindings +title: Marvel SSPA Digital Audio Interface maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml index 5e26b3e9db2cd..bb42220916b35 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,lpass-cpu.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Technologies Inc. LPASS CPU dai driver bindings +title: Qualcomm Technologies Inc. LPASS CPU dai driver maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml b/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml index 73a4afad5a743..a53c9ef938fa1 100644 --- a/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/sound/qcom,q6apm-dai.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm Audio Process Manager Digital Audio Interfaces binding +title: Qualcomm Audio Process Manager Digital Audio Interfaces maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml index aa6c0ecba5cfc..1168410f6fbd7 100644 --- a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/sound/qcom,q6dsp-lpass-clocks.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm DSP LPASS Clock Controller binding +title: Qualcomm DSP LPASS Clock Controller maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml index d8ebf2e528d27..d06f188030a3a 100644 --- a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/sound/qcom,q6dsp-lpass-ports.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Qualcomm DSP LPASS(Low Power Audio SubSystem) Audio Ports binding +title: Qualcomm DSP LPASS(Low Power Audio SubSystem) Audio Ports maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml index fa8f4ac209855..e6c817de34497 100644 --- a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml +++ b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/aspeed,ast2600-fmc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Aspeed SMC controllers bindings +title: Aspeed SMC controllers maintainers: - Chin-Ting Kuo diff --git a/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml b/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml index 0abcac385e7cb..5f4f6b5615d06 100644 --- a/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml +++ b/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/spi/marvell,mmp2-ssp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: PXA2xx SSP SPI Controller bindings +title: PXA2xx SSP SPI Controller maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml index 6ec6f556182f4..1eb17f7a4d862 100644 --- a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml +++ b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/st,stm32-qspi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) bindings +title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) maintainers: - Christophe Kerello diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml index 3d64bed266ac7..1cda15f91cc30 100644 --- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml +++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/st,stm32-spi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 SPI Controller bindings +title: STMicroelectronics STM32 SPI Controller description: | The STM32 SPI controller is used to communicate with external devices using diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml index 16b57f57d1032..b22c8b59d5c7c 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/imx-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX Thermal Binding +title: NXP i.MX Thermal maintainers: - Shawn Guo diff --git a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml index b90726229ac9c..d2c1e4573c327 100644 --- a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/imx8mm-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP i.MX8M Mini Thermal Binding +title: NXP i.MX8M Mini Thermal maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml b/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml index 6d65a3cf2af25..76aaa004c8ac5 100644 --- a/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/sprd-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Spreadtrum thermal sensor controller bindings +title: Spreadtrum thermal sensor controller maintainers: - Orson Zhai diff --git a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml index bee41cff51429..ab043084f6678 100644 --- a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/st,stm32-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 digital thermal sensor (DTS) binding +title: STMicroelectronics STM32 digital thermal sensor (DTS) maintainers: - Pascal Paillet diff --git a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml index 7bb9327caa13c..b9022f1613d81 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/thermal/thermal-cooling-devices.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Thermal cooling device binding +title: Thermal cooling device maintainers: - Amit Kucheria diff --git a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml index 0fd6d9ae61965..1b77d542a7b8d 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/thermal/thermal-idle.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Thermal idle cooling device binding +title: Thermal idle cooling device maintainers: - Daniel Lezcano diff --git a/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml b/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml index 4bd345c71eb83..57565b3fb07c1 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/thermal/thermal-sensor.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Thermal sensor binding +title: Thermal sensor maintainers: - Amit Kucheria diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 8d2c6d74b605a..8581821fa4e17 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/thermal/thermal-zones.yaml# $schema: http://devicetree.org/meta-schemas/base.yaml# -title: Thermal zone binding +title: Thermal zone maintainers: - Amit Kucheria diff --git a/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml index ea14de80ec759..7ed0abe9290f6 100644 --- a/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/ti,am654-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments AM654 VTM (DTS) binding +title: Texas Instruments AM654 VTM (DTS) maintainers: - Keerthy diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml index 0509c9cec224d..171b3622ed847 100644 --- a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/ti,j72xx-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments J72XX VTM (DTS) binding +title: Texas Instruments J72XX VTM (DTS) maintainers: - Keerthy diff --git a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml index 1fbc260a0cbdf..1ee4aab695d38 100644 --- a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml +++ b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/timer/mrvl,mmp-timer.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell MMP Timer bindings +title: Marvell MMP Timer maintainers: - Daniel Lezcano diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml index 937aa8a563660..9ec11537620ac 100644 --- a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml +++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/timer/st,stm32-timer.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers bindings +title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers maintainers: - Fabrice Gasnier diff --git a/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml b/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml index 0e72c08e65665..e4d893369d57b 100644 --- a/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml +++ b/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/analogix,anx7411.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Analogix ANX7411 Type-C controller bindings +title: Analogix ANX7411 Type-C controller maintainers: - Xin Ji diff --git a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml index dc9d6ed0781d2..cae46c4982adf 100644 --- a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml +++ b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/cdns,usb3.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Cadence USBSS-DRD controller bindings +title: Cadence USBSS-DRD controller maintainers: - Pawel Laszczak diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml index 1ab85489a3f8f..371ba93f3ce58 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.yaml +++ b/Documentation/devicetree/bindings/usb/dwc2.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/dwc2.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DesignWare HS OTG USB 2.0 controller Bindings +title: DesignWare HS OTG USB 2.0 controller maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml index c69bbfbcf733d..84b3b69256b1e 100644 --- a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml +++ b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/usb/faraday,fotg210.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Faraday Technology FOTG210 HS OTG USB 2.0 controller Bindings +title: Faraday Technology FOTG210 HS OTG USB 2.0 controller maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml b/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml index 3cf93dd45eb70..a0246aa1f2360 100644 --- a/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml +++ b/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/usb/marvell,pxau2o-ehci.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Marvell PXA/MMP EHCI bindings +title: Marvell PXA/MMP EHCI maintainers: - Lubomir Rintel diff --git a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml index f238848ad094d..e2743a4b95208 100644 --- a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml +++ b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/nxp,isp1760.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: NXP ISP1760 family controller bindings +title: NXP ISP1760 family controller maintainers: - Sebastian Siewior diff --git a/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml b/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml index 65a93f7738d5d..e3e87e4d32925 100644 --- a/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml +++ b/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/richtek,rt1719.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Richtek RT1719 sink-only Type-C PD controller bindings +title: Richtek RT1719 sink-only Type-C PD controller maintainers: - ChiYuan Huang diff --git a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml index b8974807b666d..ffcd9897ea38d 100644 --- a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml +++ b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/st,stusb160x.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: STMicroelectronics STUSB160x Type-C controller bindings +title: STMicroelectronics STUSB160x Type-C controller maintainers: - Amelie Delaunay diff --git a/Documentation/devicetree/bindings/virtio/virtio-device.yaml b/Documentation/devicetree/bindings/virtio/virtio-device.yaml index 1778ea9b5aa50..8c6919ba9497e 100644 --- a/Documentation/devicetree/bindings/virtio/virtio-device.yaml +++ b/Documentation/devicetree/bindings/virtio/virtio-device.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/virtio/virtio-device.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Virtio device bindings +title: Virtio device maintainers: - Viresh Kumar diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml index 39736449ba646..a8e266f80c20e 100644 --- a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml +++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/watchdog/st,stm32-iwdg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: STMicroelectronics STM32 Independent WatchDoG (IWDG) bindings +title: STMicroelectronics STM32 Independent WatchDoG (IWDG) maintainers: - Yannick Fertre -- GitLab From 9d69d47fd399137d41b744065aab2f9677ccc377 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:13 +0100 Subject: [PATCH 675/694] dt-bindings: drop redundant part of title (beginning) The Devicetree bindings document does not have to say in the title that it is a "Devicetree binding", but instead just describe the hardware. Drop beginning "Devicetree bindings" in various forms: find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -exec sed -i -e 's/^title: [dD]evice[ -]\?[tT]ree [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -exec sed -i -e 's/^title: [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -exec sed -i -e 's/^title: [dD][tT] [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \; Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Reviewed-by: Jonathan Cameron Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Dmitry Torokhov # input Acked-by: Mark Brown Acked-by: Sebastian Reichel # power Link: https://lore.kernel.org/r/20221216163815.522628-8-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml | 2 +- Documentation/devicetree/bindings/clock/fixed-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/idt,versaclock5.yaml | 2 +- Documentation/devicetree/bindings/clock/renesas,9series.yaml | 2 +- Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml | 2 +- .../devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml | 2 +- .../devicetree/bindings/display/bridge/intel,keembay-dsi.yaml | 2 +- .../devicetree/bindings/display/intel,keembay-display.yaml | 2 +- .../devicetree/bindings/display/intel,keembay-msscam.yaml | 2 +- Documentation/devicetree/bindings/display/msm/gmu.yaml | 2 +- Documentation/devicetree/bindings/display/msm/gpu.yaml | 2 +- .../devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml | 2 +- .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml | 2 +- .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml | 2 +- .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml | 2 +- .../devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml | 2 +- Documentation/devicetree/bindings/i2c/i2c-gpio.yaml | 2 +- Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml | 2 +- .../devicetree/bindings/iio/adc/sigma-delta-modulator.yaml | 2 +- Documentation/devicetree/bindings/input/gpio-keys.yaml | 2 +- Documentation/devicetree/bindings/input/microchip,cap11xx.yaml | 2 +- .../devicetree/bindings/interrupt-controller/renesas,irqc.yaml | 2 +- Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml | 2 +- Documentation/devicetree/bindings/leds/register-bit-led.yaml | 2 +- Documentation/devicetree/bindings/leds/regulator-led.yaml | 2 +- Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml | 2 +- Documentation/devicetree/bindings/net/ingenic,mac.yaml | 2 +- Documentation/devicetree/bindings/power/supply/bq2415x.yaml | 2 +- Documentation/devicetree/bindings/power/supply/bq24190.yaml | 2 +- Documentation/devicetree/bindings/power/supply/bq24257.yaml | 2 +- Documentation/devicetree/bindings/power/supply/bq24735.yaml | 2 +- Documentation/devicetree/bindings/power/supply/bq25890.yaml | 2 +- Documentation/devicetree/bindings/power/supply/isp1704.yaml | 2 +- .../devicetree/bindings/power/supply/lltc,ltc294x.yaml | 2 +- .../devicetree/bindings/power/supply/richtek,rt9455.yaml | 2 +- Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml | 2 +- Documentation/devicetree/bindings/regulator/pwm-regulator.yaml | 2 +- Documentation/devicetree/bindings/rng/ingenic,rng.yaml | 2 +- Documentation/devicetree/bindings/rng/ingenic,trng.yaml | 2 +- Documentation/devicetree/bindings/serial/8250_omap.yaml | 2 +- Documentation/devicetree/bindings/serio/ps2-gpio.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml | 2 +- Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml | 2 +- Documentation/devicetree/bindings/timer/ingenic,sysost.yaml | 2 +- Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml | 2 +- Documentation/devicetree/bindings/usb/realtek,rts5411.yaml | 2 +- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 2 +- Documentation/devicetree/bindings/usb/ti,usb8041.yaml | 2 +- 53 files changed, 53 insertions(+), 53 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml index 983033fe5b177..5e942bccf2778 100644 --- a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml +++ b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/adi,axi-clkgen.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for Analog Devices AXI clkgen pcore clock generator +title: Analog Devices AXI clkgen pcore clock generator maintainers: - Lars-Peter Clausen diff --git a/Documentation/devicetree/bindings/clock/fixed-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-clock.yaml index b657ecd0ef1c3..b0a4fb8256e2f 100644 --- a/Documentation/devicetree/bindings/clock/fixed-clock.yaml +++ b/Documentation/devicetree/bindings/clock/fixed-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fixed-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for simple fixed-rate clock sources +title: Simple fixed-rate clock sources maintainers: - Michael Turquette diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml index 0b02378a3a0c8..8f71ab3004706 100644 --- a/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml +++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fixed-factor-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for simple fixed factor rate clock sources +title: Simple fixed factor rate clock sources maintainers: - Michael Turquette diff --git a/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml index 1453ac849a659..e22fc272d0231 100644 --- a/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml +++ b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fixed-mmio-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for simple memory mapped IO fixed-rate clock sources +title: Simple memory mapped IO fixed-rate clock sources description: This binding describes a fixed-rate clock for which the frequency can diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml index f9ba9864d8b52..61b246cf5e72a 100644 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/idt,versaclock5.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for IDT VersaClock 5 and 6 programmable I2C clock generators +title: IDT VersaClock 5 and 6 programmable I2C clock generators description: | The IDT VersaClock 5 and VersaClock 6 are programmable I2C diff --git a/Documentation/devicetree/bindings/clock/renesas,9series.yaml b/Documentation/devicetree/bindings/clock/renesas,9series.yaml index 102eb95cb3fcd..6b6cec3fba528 100644 --- a/Documentation/devicetree/bindings/clock/renesas,9series.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,9series.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/renesas,9series.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for Renesas 9-series I2C PCIe clock generators +title: Renesas 9-series I2C PCIe clock generators description: | The Renesas 9-series are I2C PCIe clock generators providing diff --git a/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml index c56f911fff475..d525f96cf244b 100644 --- a/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml +++ b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/ti/ti,clksel.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for TI clksel clock +title: TI clksel clock maintainers: - Tony Lindgren diff --git a/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml index 89490fdffeb07..0b27df429bdce 100644 --- a/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/ingenic,jz4780-hdmi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for Ingenic JZ4780 HDMI Transmitter +title: Ingenic JZ4780 HDMI Transmitter maintainers: - H. Nikolaus Schaller diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml index dcb1336ee2a56..958a073f4ff77 100644 --- a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/bridge/intel,keembay-dsi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Devicetree bindings for Intel Keem Bay mipi dsi controller +title: Intel Keem Bay mipi dsi controller maintainers: - Anitha Chrisanthus diff --git a/Documentation/devicetree/bindings/display/intel,keembay-display.yaml b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml index bc6622b010cab..2cf54ecc707aa 100644 --- a/Documentation/devicetree/bindings/display/intel,keembay-display.yaml +++ b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/intel,keembay-display.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Devicetree bindings for Intel Keem Bay display controller +title: Intel Keem Bay display controller maintainers: - Anitha Chrisanthus diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml index a222b52d8b8ff..cc7e1f318fe43 100644 --- a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml +++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/intel,keembay-msscam.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Devicetree bindings for Intel Keem Bay MSSCAM +title: Intel Keem Bay MSSCAM maintainers: - Anitha Chrisanthus diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml index 67fdeeabae0cc..ab14e81cb0506 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml @@ -6,7 +6,7 @@ $id: "http://devicetree.org/schemas/display/msm/gmu.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Devicetree bindings for the GMU attached to certain Adreno GPUs +title: GMU attached to certain Adreno GPUs maintainers: - Rob Clark diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml index ec4b1a75f46ac..c5f49842dc7b5 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml @@ -5,7 +5,7 @@ $id: "http://devicetree.org/schemas/display/msm/gpu.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Devicetree bindings for the Adreno or Snapdragon GPUs +title: Adreno or Snapdragon GPUs maintainers: - Rob Clark diff --git a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml index 2329d9610f832..9f97598efdfab 100644 --- a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml +++ b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/display/panel/olimex,lcd-olinuxino.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for Olimex Ltd. LCD-OLinuXino bridge panel. +title: Olimex Ltd. LCD-OLinuXino bridge panel. maintainers: - Stefan Mavrodiev diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml index 3cf8629764483..ed9554c837ef7 100644 --- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml +++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvdec.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree binding for NVIDIA Tegra NVDEC +title: NVIDIA Tegra NVDEC description: | NVDEC is the hardware video decoder present on NVIDIA Tegra210 diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml index e63ae1a008183..8199e5fa82117 100644 --- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml +++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvenc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree binding for NVIDIA Tegra NVENC +title: NVIDIA Tegra NVENC description: | NVENC is the hardware video encoder present on NVIDIA Tegra210 diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml index 8647404d67e4f..895fb346ac72c 100644 --- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml +++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvjpg.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree binding for NVIDIA Tegra NVJPG +title: NVIDIA Tegra NVJPG description: | NVJPG is the hardware JPEG decoder and encoder present on NVIDIA Tegra210 diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml index 7cc2dd525a96d..4bdc19a2bccfb 100644 --- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml +++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra234-nvdec.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree binding for NVIDIA Tegra234 NVDEC +title: NVIDIA Tegra234 NVDEC description: | NVDEC is the hardware video decoder present on NVIDIA Tegra210 diff --git a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml index fd040284561f8..e0d76d5eb1031 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i2c/i2c-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for GPIO bitbanged I2C +title: GPIO bitbanged I2C maintainers: - Wolfram Sang diff --git a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml index db0843be91c56..781108ae1ce3b 100644 --- a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/i2c/ti,omap4-i2c.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for I2C controllers on TI's OMAP and K3 SoCs +title: I2C controllers on TI's OMAP and K3 SoCs maintainers: - Vignesh Raghavendra diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml index 2287697f1f61c..cab0d425eaa42 100644 --- a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml +++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/iio/adc/sigma-delta-modulator.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Device-Tree bindings for sigma delta modulator +title: Sigma delta modulator maintainers: - Arnaud Pouliquen diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 17ac9dff79720..159cd9d9fe573 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/gpio-keys.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Device-Tree bindings for GPIO attached keys +title: GPIO attached keys maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml index 96358b12f9b20..67d4d8f86a2d8 100644 --- a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml +++ b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/input/microchip,cap11xx.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree bindings for Microchip CAP11xx based capacitive touch sensors +title: Microchip CAP11xx based capacitive touch sensors description: | The Microchip CAP1xxx Family of RightTouchTM multiple-channel capacitive diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml index 62fd47c88275d..95033cb514fbd 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/renesas,irqc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: DT bindings for the R-Mobile/R-Car/RZ/G interrupt controller +title: R-Mobile/R-Car/RZ/G interrupt controller maintainers: - Geert Uytterhoeven diff --git a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml index 4c15693f7a013..9acdb78955147 100644 --- a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/backlight/qcom-wled.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for Qualcomm Technologies, Inc. WLED driver +title: Qualcomm Technologies, Inc. WLED driver maintainers: - Bjorn Andersson diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.yaml b/Documentation/devicetree/bindings/leds/register-bit-led.yaml index 79b8fc0f9d235..ed26ec19ecbd8 100644 --- a/Documentation/devicetree/bindings/leds/register-bit-led.yaml +++ b/Documentation/devicetree/bindings/leds/register-bit-led.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/register-bit-led.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Device Tree Bindings for Register Bit LEDs +title: Register Bit LEDs maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/leds/regulator-led.yaml b/Documentation/devicetree/bindings/leds/regulator-led.yaml index 3e020d700c00f..4ef7b96e9a086 100644 --- a/Documentation/devicetree/bindings/leds/regulator-led.yaml +++ b/Documentation/devicetree/bindings/leds/regulator-led.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/regulator-led.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Device Tree Bindings for Regulator LEDs +title: Regulator LEDs maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml index 83be9e93d221b..4053de758db60 100644 --- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml +++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/mmc/arasan,sdhci.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device Tree Bindings for the Arasan SDHCI Controller +title: Arasan SDHCI Controller maintainers: - Adrian Hunter diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml index 93b3e991d209a..bdea101c2f755 100644 --- a/Documentation/devicetree/bindings/net/ingenic,mac.yaml +++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/ingenic,mac.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for MAC in Ingenic SoCs +title: MAC in Ingenic SoCs maintainers: - 周琰杰 (Zhou Yanjie) diff --git a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml index a3c00e0789183..f7287ffd4b12c 100644 --- a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/bq2415x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for TI bq2415x Li-Ion Charger +title: TI bq2415x Li-Ion Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml index 4884ec90e2b8b..001c0ffb408d9 100644 --- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/bq24190.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for TI BQ2419x Li-Ion Battery Charger +title: TI BQ2419x Li-Ion Battery Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/bq24257.yaml b/Documentation/devicetree/bindings/power/supply/bq24257.yaml index c7406bef0fa8f..cc45939d385bd 100644 --- a/Documentation/devicetree/bindings/power/supply/bq24257.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq24257.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/bq24257.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for bq24250, bq24251 and bq24257 Li-Ion Charger +title: Bq24250, bq24251 and bq24257 Li-Ion Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/bq24735.yaml b/Documentation/devicetree/bindings/power/supply/bq24735.yaml index dd9176ce71b34..388ee16f8a1e4 100644 --- a/Documentation/devicetree/bindings/power/supply/bq24735.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq24735.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/bq24735.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for TI BQ24735 Li-Ion Battery Charger +title: TI BQ24735 Li-Ion Battery Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml index 204c0147188f2..6273b1c5412b8 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/bq25890.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for bq25890, bq25892, bq25895 and bq25896 Li-Ion Charger +title: Bq25890, bq25892, bq25895 and bq25896 Li-Ion Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/isp1704.yaml b/Documentation/devicetree/bindings/power/supply/isp1704.yaml index 7e3449ed70d4d..fb3a812aa5a9c 100644 --- a/Documentation/devicetree/bindings/power/supply/isp1704.yaml +++ b/Documentation/devicetree/bindings/power/supply/isp1704.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/power/supply/isp1704.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for NXP ISP1704 USB Charger Detection +title: NXP ISP1704 USB Charger Detection maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml b/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml index 109b41a0d56c2..774582cd3a2c3 100644 --- a/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml +++ b/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/lltc,ltc294x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for LTC2941, LTC2942, LTC2943 and LTC2944 battery fuel gauges +title: LTC2941, LTC2942, LTC2943 and LTC2944 battery fuel gauges description: | All chips measure battery capacity. diff --git a/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml b/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml index bce15101318e7..27bebc1757ba3 100644 --- a/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml +++ b/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/richtek,rt9455.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for Richtek rt9455 battery charger +title: Richtek rt9455 battery charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml b/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml index 93654e732cdaa..ce6fbdba8f6b9 100644 --- a/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml +++ b/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/supply/ti,lp8727.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for TI/National Semiconductor LP8727 Charger +title: TI/National Semiconductor LP8727 Charger maintainers: - Sebastian Reichel diff --git a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml index 82b6f2fde422b..7e58471097f8c 100644 --- a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/regulator/pwm-regulator.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for the Generic PWM Regulator +title: Generic PWM Regulator maintainers: - Brian Norris diff --git a/Documentation/devicetree/bindings/rng/ingenic,rng.yaml b/Documentation/devicetree/bindings/rng/ingenic,rng.yaml index b2e4a6a7f93a4..79a023cbfdbab 100644 --- a/Documentation/devicetree/bindings/rng/ingenic,rng.yaml +++ b/Documentation/devicetree/bindings/rng/ingenic,rng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/ingenic,rng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for RNG in Ingenic SoCs +title: RNG in Ingenic SoCs maintainers: - 周琰杰 (Zhou Yanjie) diff --git a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml index 044d9a065650c..acaeb63caf249 100644 --- a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml +++ b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rng/ingenic,trng.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for DTRNG in Ingenic SoCs +title: DTRNG in Ingenic SoCs maintainers: - 周琰杰 (Zhou Yanjie) diff --git a/Documentation/devicetree/bindings/serial/8250_omap.yaml b/Documentation/devicetree/bindings/serial/8250_omap.yaml index 7b34ec8fa90ec..53dc1212ad2e3 100644 --- a/Documentation/devicetree/bindings/serial/8250_omap.yaml +++ b/Documentation/devicetree/bindings/serial/8250_omap.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serial/8250_omap.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for 8250 compliant UARTs on TI's OMAP2+ and K3 SoCs +title: 8250 compliant UARTs on TI's OMAP2+ and K3 SoCs maintainers: - Vignesh Raghavendra diff --git a/Documentation/devicetree/bindings/serio/ps2-gpio.yaml b/Documentation/devicetree/bindings/serio/ps2-gpio.yaml index a63d9172346f1..99848bc34f6e6 100644 --- a/Documentation/devicetree/bindings/serio/ps2-gpio.yaml +++ b/Documentation/devicetree/bindings/serio/ps2-gpio.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/serio/ps2-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for GPIO based PS/2 +title: GPIO based PS/2 maintainers: - Danilo Krummrich diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml index 8ca19f2b0b3de..184e8ccbdd137 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,wcd934x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for Qualcomm WCD9340/WCD9341 Audio Codec +title: Qualcomm WCD9340/WCD9341 Audio Codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml index 49a267b306f63..b430dd3e1841a 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,wcd938x-sdw.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for Qualcomm SoundWire Slave devices on WCD9380/WCD9385 +title: Qualcomm SoundWire Slave devices on WCD9380/WCD9385 maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml index 67d84463eaeb6..018565793a3ec 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,wcd938x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for Qualcomm WCD9380/WCD9385 Audio Codec +title: Qualcomm WCD9380/WCD9385 Audio Codec maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml index ea44d03e58caa..d702b489320fa 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,wsa881x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for Qualcomm WSA8810/WSA8815 Class-D Smart Speaker Amplifier +title: Qualcomm WSA8810/WSA8815 Class-D Smart Speaker Amplifier maintainers: - Srinivas Kandagatla diff --git a/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml b/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml index 65b0e67f82a30..ba572a7f4f3c0 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/qcom,wsa883x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for The Qualcomm WSA8830/WSA8832/WSA8835 +title: Qualcomm WSA8830/WSA8832/WSA8835 smart speaker amplifier maintainers: diff --git a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml index 98648bf9e151d..bdc82d8bce0ef 100644 --- a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml +++ b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/timer/ingenic,sysost.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Bindings for SYSOST in Ingenic XBurst family SoCs +title: SYSOST in Ingenic XBurst family SoCs maintainers: - 周琰杰 (Zhou Yanjie) diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml index fd6e7c81426ea..f6cb19efd98b1 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/nvidia,tegra-xudc.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Device tree binding for NVIDIA Tegra XUSB device mode controller (XUDC) +title: NVIDIA Tegra XUSB device mode controller (XUDC) description: The Tegra XUDC controller supports both USB 2.0 HighSpeed/FullSpeed and diff --git a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml index 50f2b505bdeb6..623d04a88a817 100644 --- a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml +++ b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/realtek,rts5411.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for the Realtek RTS5411 USB 3.0 hub controller +title: Realtek RTS5411 USB 3.0 hub controller maintainers: - Matthias Kaehlcke diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index eedde385d2994..f81ba3e902973 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -4,7 +4,7 @@ $id: "http://devicetree.org/schemas/usb/ti,j721e-usb.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" -title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller +title: TI wrapper module for the Cadence USBSS-DRD controller maintainers: - Roger Quadros diff --git a/Documentation/devicetree/bindings/usb/ti,usb8041.yaml b/Documentation/devicetree/bindings/usb/ti,usb8041.yaml index e04fbd8ab0b7d..88ea6c952c66c 100644 --- a/Documentation/devicetree/bindings/usb/ti,usb8041.yaml +++ b/Documentation/devicetree/bindings/usb/ti,usb8041.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/ti,usb8041.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding for the TI USB8041 USB 3.0 hub controller +title: TI USB8041 USB 3.0 hub controller maintainers: - Alexander Stein -- GitLab From 33cd7c6fffa3c546b3fce3b000d4b83f88c01e0d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:14 +0100 Subject: [PATCH 676/694] dt-bindings: clock: drop redundant part of title The Devicetree bindings document does not have to say in the title that it is a "Devicetree binding", but instead just describe the hardware. Drop "Devicetree bindings" in various forms: find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -exec sed -i -e 's/^title: [dD]evice[ -]\?[tT]ree [cC]lock [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2 Clock Controller/' {} \; find Documentation/devicetree/bindings/ -type f -name '*.yaml' \ -exec sed -i -e 's/^title: [cC]lock [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2 Clock Controller/' {} \; Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Reviewed-by: Jonathan Cameron Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Link: https://lore.kernel.org/r/20221216163815.522628-9-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/calxeda.yaml | 2 +- Documentation/devicetree/bindings/clock/imx1-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx21-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx23-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx25-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx27-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx28-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx31-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx35-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx5-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx6q-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx6sl-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx6sll-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx6sx-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx6ul-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx7d-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/ti,lmk04832.yaml | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/calxeda.yaml b/Documentation/devicetree/bindings/clock/calxeda.yaml index a34cbf3c9aaf5..a88fbe20fef16 100644 --- a/Documentation/devicetree/bindings/clock/calxeda.yaml +++ b/Documentation/devicetree/bindings/clock/calxeda.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/calxeda.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Device Tree Clock bindings for Calxeda highbank platform +title: Calxeda highbank platform Clock Controller description: | This binding covers the Calxeda SoC internal peripheral and bus clocks diff --git a/Documentation/devicetree/bindings/clock/imx1-clock.yaml b/Documentation/devicetree/bindings/clock/imx1-clock.yaml index 56f524780b1a6..7ade4c32aff37 100644 --- a/Documentation/devicetree/bindings/clock/imx1-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx1-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx1-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX1 CPUs +title: Freescale i.MX1 CPUs Clock Controller maintainers: - Alexander Shiyan diff --git a/Documentation/devicetree/bindings/clock/imx21-clock.yaml b/Documentation/devicetree/bindings/clock/imx21-clock.yaml index e2d50544700a2..79cc843703ec1 100644 --- a/Documentation/devicetree/bindings/clock/imx21-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx21-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx21-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX21 +title: Freescale i.MX21 Clock Controller maintainers: - Alexander Shiyan diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.yaml b/Documentation/devicetree/bindings/clock/imx23-clock.yaml index 7e890ab9c77d2..5e71c9219500a 100644 --- a/Documentation/devicetree/bindings/clock/imx23-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx23-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx23-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX23 +title: Freescale i.MX23 Clock Controller maintainers: - Shawn Guo diff --git a/Documentation/devicetree/bindings/clock/imx25-clock.yaml b/Documentation/devicetree/bindings/clock/imx25-clock.yaml index 1792e138984b9..c626a158590e2 100644 --- a/Documentation/devicetree/bindings/clock/imx25-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx25-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx25-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX25 +title: Freescale i.MX25 Clock Controller maintainers: - Sascha Hauer diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.yaml b/Documentation/devicetree/bindings/clock/imx27-clock.yaml index 99925aa22a4c8..71d78a0b551fc 100644 --- a/Documentation/devicetree/bindings/clock/imx27-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx27-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx27-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX27 +title: Freescale i.MX27 Clock Controller maintainers: - Fabio Estevam diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.yaml b/Documentation/devicetree/bindings/clock/imx28-clock.yaml index a542d680b1ca8..4aaad7b9c66e9 100644 --- a/Documentation/devicetree/bindings/clock/imx28-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx28-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx28-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX28 +title: Freescale i.MX28 Clock Controller maintainers: - Shawn Guo diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.yaml b/Documentation/devicetree/bindings/clock/imx31-clock.yaml index 168c8ada5e81c..50a8498eef8a2 100644 --- a/Documentation/devicetree/bindings/clock/imx31-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx31-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx31-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX31 +title: Freescale i.MX31 Clock Controller maintainers: - Fabio Estevam diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.yaml b/Documentation/devicetree/bindings/clock/imx35-clock.yaml index 6415bb6a8d041..c063369de3ec7 100644 --- a/Documentation/devicetree/bindings/clock/imx35-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx35-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx35-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX35 +title: Freescale i.MX35 Clock Controller maintainers: - Steffen Trumtrar diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.yaml b/Documentation/devicetree/bindings/clock/imx5-clock.yaml index c0e19ff92c767..423c0142c1d33 100644 --- a/Documentation/devicetree/bindings/clock/imx5-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx5-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx5-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX5 +title: Freescale i.MX5 Clock Controller maintainers: - Fabio Estevam diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.yaml b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml index 4f4637eddb8b7..bae4fcb3aacc6 100644 --- a/Documentation/devicetree/bindings/clock/imx6q-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx6q-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX6 Quad +title: Freescale i.MX6 Quad Clock Controller maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml index b83c8f43d664c..c85ff6ea3d245 100644 --- a/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx6sl-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX6 SoloLite +title: Freescale i.MX6 SoloLite Clock Controller maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml index 484894a4b23f4..6b549ed1493c3 100644 --- a/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx6sll-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX6 SLL +title: Freescale i.MX6 SLL Clock Controller maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml index e6c795657c24d..55dcad18b7c6d 100644 --- a/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx6sx-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX6 SoloX +title: Freescale i.MX6 SoloX Clock Controller maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml index 6a51a3f51cd98..be54d4df5afa2 100644 --- a/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx6ul-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX6 UltraLite +title: Freescale i.MX6 UltraLite Clock Controller maintainers: - Anson Huang diff --git a/Documentation/devicetree/bindings/clock/imx7d-clock.yaml b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml index cefb61db01a82..e7d8427e49579 100644 --- a/Documentation/devicetree/bindings/clock/imx7d-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx7d-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX7 Dual +title: Freescale i.MX7 Dual Clock Controller maintainers: - Frank Li diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml index 739c3378f8c8f..76842038f52ea 100644 --- a/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx7ulp-pcc-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX7ULP Peripheral Clock Control (PCC) modules +title: Freescale i.MX7ULP Peripheral Clock Control (PCC) modules Clock Controller maintainers: - A.s. Dong diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml index d06344d7e34fe..5e25bc6d1372e 100644 --- a/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imx7ulp-scg-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MX7ULP System Clock Generation (SCG) modules +title: Freescale i.MX7ULP System Clock Generation (SCG) modules Clock Controller maintainers: - A.s. Dong diff --git a/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml index 03fc5c1a29399..777af4aad4b26 100644 --- a/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/imxrt1050-clock.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for Freescale i.MXRT +title: Freescale i.MXRT Clock Controller maintainers: - Giulio Benetti diff --git a/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml index bd8173848253f..73d17830f165e 100644 --- a/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml +++ b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/ti,lmk04832.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Clock bindings for the Texas Instruments LMK04832 +title: Texas Instruments LMK04832 Clock Controller maintainers: - Liam Beguin -- GitLab From 3367934dd3035afa72ac79ae649f142a530df157 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Dec 2022 17:38:15 +0100 Subject: [PATCH 677/694] dt-bindings: drop redundant part of title (manual) The Devicetree bindings document does not have to say in the title that it is a "Devicetree binding" or a "schema", but instead just describe the hardware. Manual updates to various binding titles, including capitalizing them. Signed-off-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Reviewed-by: Jonathan Cameron Acked-by: Ulf Hansson # MMC Acked-by: Stephen Boyd # clk Acked-by: Dmitry Torokhov # input Acked-by: Mark Brown Acked-by: Viresh Kumar # opp Link: https://lore.kernel.org/r/20221216163815.522628-10-krzysztof.kozlowski@linaro.org [robh: add trivial-devices.yaml and net/can/microchip,mcp251xfd.yaml] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml | 2 +- Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml | 2 +- .../devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml | 2 +- Documentation/devicetree/bindings/example-schema.yaml | 2 +- Documentation/devicetree/bindings/input/fsl,scu-key.yaml | 2 +- Documentation/devicetree/bindings/input/matrix-keymap.yaml | 2 +- Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml | 2 +- Documentation/devicetree/bindings/net/asix,ax88178.yaml | 2 +- .../devicetree/bindings/net/can/microchip,mcp251xfd.yaml | 4 +--- Documentation/devicetree/bindings/net/microchip,lan95xx.yaml | 2 +- Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml | 2 +- Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml | 2 +- Documentation/devicetree/bindings/pci/pci-ep.yaml | 2 +- Documentation/devicetree/bindings/phy/calxeda-combophy.yaml | 2 +- .../devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml | 2 +- Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml | 2 +- Documentation/devicetree/bindings/power/fsl,scu-pd.yaml | 2 +- Documentation/devicetree/bindings/riscv/cpus.yaml | 2 +- Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml | 2 +- Documentation/devicetree/bindings/spi/omap-spi.yaml | 2 +- .../devicetree/bindings/thermal/fsl,scu-thermal.yaml | 2 +- Documentation/devicetree/bindings/trivial-devices.yaml | 2 +- Documentation/devicetree/bindings/usb/usb-device.yaml | 2 +- Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml | 2 +- 25 files changed, 25 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml index 82836086cac14..d416c374e8534 100644 --- a/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml +++ b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/cirrus,cs2000-cp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Binding CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier +title: CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier maintainers: - Kuninori Morimoto diff --git a/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml index f2c48460a3997..36d4cfc3c2f8d 100644 --- a/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml +++ b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/fsl,scu-clk.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - Clock bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - Clock Controller Based on SCU Message Protocol maintainers: - Abel Vesa diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml index 28c13237059fa..3cb996b2c9d59 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/clock/qcom,dispcc-sc8280xp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Display Clock & Reset Controller Binding for SC8280XP +title: Qualcomm Display Clock & Reset Controller on SC8280XP maintainers: - Bjorn Andersson diff --git a/Documentation/devicetree/bindings/example-schema.yaml b/Documentation/devicetree/bindings/example-schema.yaml index 8e1a8b19d4294..dfcf4c27d44a3 100644 --- a/Documentation/devicetree/bindings/example-schema.yaml +++ b/Documentation/devicetree/bindings/example-schema.yaml @@ -11,7 +11,7 @@ $id: http://devicetree.org/schemas/example-schema.yaml# # $schema is the meta-schema this schema should be validated with. $schema: http://devicetree.org/meta-schemas/core.yaml# -title: An example schema annotated with jsonschema details +title: An Example Device maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml index e6266d1882665..e5a3c355ee1f0 100644 --- a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml +++ b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/fsl,scu-key.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - SCU key bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - SCU Key Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/input/matrix-keymap.yaml b/Documentation/devicetree/bindings/input/matrix-keymap.yaml index 6699d5e32dcaf..4d6dbe91646dc 100644 --- a/Documentation/devicetree/bindings/input/matrix-keymap.yaml +++ b/Documentation/devicetree/bindings/input/matrix-keymap.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/input/matrix-keymap.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Common key matrices binding for matrix-connected key boards +title: Common Key Matrices on Matrix-connected Key Boards maintainers: - Olof Johansson diff --git a/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml b/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml index 2929382625b69..d1b01bae9f63e 100644 --- a/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml +++ b/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/leds/issi,is31fl319x.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ISSI LED controllers bindings for IS31FL319{0,1,3,6,9} +title: ISSI LED Controllers for IS31FL319{0,1,3,6,9} maintainers: - Vincent Knecht diff --git a/Documentation/devicetree/bindings/net/asix,ax88178.yaml b/Documentation/devicetree/bindings/net/asix,ax88178.yaml index a81dbc4792f68..768504ccbf741 100644 --- a/Documentation/devicetree/bindings/net/asix,ax88178.yaml +++ b/Documentation/devicetree/bindings/net/asix,ax88178.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/asix,ax88178.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: The device tree bindings for the USB Ethernet controllers +title: ASIX AX88172/AX88772 USB Ethernet Controllers maintainers: - Oleksij Rempel diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml index 7a73057707b44..fce84aecae776 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml @@ -4,9 +4,7 @@ $id: http://devicetree.org/schemas/net/can/microchip,mcp251xfd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: - Microchip MCP2517FD, MCP2518FD and MCP251863 stand-alone CAN - controller device tree bindings +title: Microchip MCP2517FD, MCP2518FD and MCP251863 stand-alone CAN controller maintainers: - Marc Kleine-Budde diff --git a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml index 3715c5f8f0e0c..0b97e14d947f2 100644 --- a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml +++ b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/microchip,lan95xx.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: The device tree bindings for the USB Ethernet controllers +title: Microchip SMSC9500/LAN9530/LAN9730 USB Ethernet Controllers maintainers: - Oleksij Rempel diff --git a/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml index 682688299b26d..f0a49283649d9 100644 --- a/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/nvmem/fsl,scu-ocotp.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - OCOTP bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - OCOTP Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml index df8442fb11f0c..b9ce2e099ce9a 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/opp/opp-v2-qcom-level.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm OPP bindings to describe OPP nodes. +title: Qualcomm OPP maintainers: - Niklas Cassel diff --git a/Documentation/devicetree/bindings/pci/pci-ep.yaml b/Documentation/devicetree/bindings/pci/pci-ep.yaml index ccec51ab52470..d1eef48252072 100644 --- a/Documentation/devicetree/bindings/pci/pci-ep.yaml +++ b/Documentation/devicetree/bindings/pci/pci-ep.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pci/pci-ep.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: PCI Endpoint Controller Schema +title: PCI Endpoint Controller description: | Common properties for PCI Endpoint Controller Nodes. diff --git a/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml b/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml index 41ee16e21f8d2..d05a7c7930353 100644 --- a/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml +++ b/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/phy/calxeda-combophy.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Calxeda Highbank Combination PHYs binding for SATA +title: Calxeda Highbank Combination PHYs for SATA description: | The Calxeda Combination PHYs connect the SoC to the internal fabric diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml index 45ea565ce2387..fcd729afeee13 100644 --- a/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pinctrl/fsl,scu-pinctrl.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - Pinctrl bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - Pinctrl Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml index f5a121311f612..be81ed22a036a 100644 --- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pinctrl/pincfg-node.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic pin configuration node schema +title: Generic Pin Configuration Node maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml index 551df3d9b8090..008c3ab7f1bb3 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/pinctrl/pinmux-node.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Generic pin multiplexing node schema +title: Generic Pin Multiplexing Node maintainers: - Linus Walleij diff --git a/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml b/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml index 1f72b18ca0fcf..407b7cfec783c 100644 --- a/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml +++ b/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/power/fsl,scu-pd.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - Power domain bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - Power Domain Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 83ad177a9043b..c6720764e765c 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/riscv/cpus.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: RISC-V bindings for 'cpus' DT nodes +title: RISC-V CPUs maintainers: - Paul Walmsley diff --git a/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml b/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml index 8c102b70d735a..dd1b1abf1e1bc 100644 --- a/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/rtc/fsl,scu-rtc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - RTC bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - RTC Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/spi/omap-spi.yaml b/Documentation/devicetree/bindings/spi/omap-spi.yaml index 9952199cae119..352affa4b7f86 100644 --- a/Documentation/devicetree/bindings/spi/omap-spi.yaml +++ b/Documentation/devicetree/bindings/spi/omap-spi.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/spi/omap-spi.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: SPI controller bindings for OMAP and K3 SoCs +title: SPI Controller on OMAP and K3 SoCs maintainers: - Aswath Govindraju diff --git a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml index f9e4b3c8d0eeb..3721c8c8ec640 100644 --- a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/thermal/fsl,scu-thermal.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - Thermal bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - Thermal Based on SCU Message Protocol maintainers: - Dong Aisheng diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 61746755c1077..f5c0a6283e61f 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/trivial-devices.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Trivial I2C and SPI devices that have simple device tree bindings +title: Trivial I2C and SPI devices maintainers: - Rob Herring diff --git a/Documentation/devicetree/bindings/usb/usb-device.yaml b/Documentation/devicetree/bindings/usb/usb-device.yaml index b77960a7a37ba..7a771125ec76c 100644 --- a/Documentation/devicetree/bindings/usb/usb-device.yaml +++ b/Documentation/devicetree/bindings/usb/usb-device.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/usb-device.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: The device tree bindings for the Generic USB Device +title: Generic USB Device maintainers: - Greg Kroah-Hartman diff --git a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml index f84c45d687d7b..47701248cd8d4 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/watchdog/fsl,scu-wdt.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: i.MX SCU Client Device Node - Watchdog bindings based on SCU Message Protocol +title: i.MX SCU Client Device Node - Watchdog Based on SCU Message Protocol maintainers: - Dong Aisheng -- GitLab From 4545c6a3d6ba71747eaa984c338ddd745e56e23f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 17 Dec 2022 10:46:44 +0000 Subject: [PATCH 678/694] powerpc/msi: Fix deassociation of MSI descriptors Since 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()"), the core MSI code relies on the msi_desc->irq field to have been cleared before the descriptor can be freed, as it indicates that there is no association with a device anymore. The irq domain code provides this guarantee, and so does s390, which is one of the two architectures not using irq domains for MSIs. Powerpc, however, is missing this particular requirements, leading in a splat and leaked MSI descriptors. Adding the now required irq reset to the handful of powerpc backends that implement MSIs fixes that particular problem. Reported-by: Guenter Roeck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/70dab88e-6119-0c12-7c6a-61bcbe239f66@roeck-us.net --- arch/powerpc/platforms/4xx/hsta_msi.c | 1 + arch/powerpc/platforms/cell/axon_msi.c | 1 + arch/powerpc/platforms/pasemi/msi.c | 1 + arch/powerpc/sysdev/fsl_msi.c | 1 + arch/powerpc/sysdev/mpic_u3msi.c | 1 + 5 files changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c index d4f7fff1fc871..e11b57a62b054 100644 --- a/arch/powerpc/platforms/4xx/hsta_msi.c +++ b/arch/powerpc/platforms/4xx/hsta_msi.c @@ -115,6 +115,7 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev) msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1); pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__, entry->irq, irq); + entry->irq = 0; } } diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 5b012abca773d..0c11aad896c79 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -289,6 +289,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) { irq_set_msi_desc(entry->irq, NULL); irq_dispose_mapping(entry->irq); + entry->irq = 0; } } diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index dc1846660005c..166c97fff16d2 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -66,6 +66,7 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); irq_dispose_mapping(entry->irq); + entry->irq = 0; msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); } } diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 73c2d70706c0a..57978a44d55b6 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -132,6 +132,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); irq_dispose_mapping(entry->irq); + entry->irq = 0; msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 1d8cfdfdf115e..492cb03c0b623 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -108,6 +108,7 @@ static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); irq_dispose_mapping(entry->irq); + entry->irq = 0; msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } } -- GitLab From 1bc5434632593ea3bb3a1ed2499af8c31796448b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 16 Nov 2022 21:46:03 +0100 Subject: [PATCH 679/694] parisc: Fix inconsistent indenting in setup_cmdline() Fix warning reported by 0-DAY CI Kernel Test Service: arch/parisc/kernel/setup.c:64 setup_cmdline() warn: inconsistent indenting Reported-by: kernel test robot Signed-off-by: Helge Deller --- arch/parisc/kernel/setup.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 375f38d6e1a4d..0797db617962b 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -50,15 +50,15 @@ void __init setup_cmdline(char **cmdline_p) extern unsigned int boot_args[]; char *p; - /* Collect stuff passed in from the boot loader */ + *cmdline_p = command_line; /* boot_args[0] is free-mem start, boot_args[1] is ptr to command line */ - if (boot_args[0] < 64) { - /* called from hpux boot loader */ - boot_command_line[0] = '\0'; - } else { - strscpy(boot_command_line, (char *)__va(boot_args[1]), - COMMAND_LINE_SIZE); + if (boot_args[0] < 64) + return; /* return if called from hpux boot loader */ + + /* Collect stuff passed in from the boot loader */ + strscpy(boot_command_line, (char *)__va(boot_args[1]), + COMMAND_LINE_SIZE); /* autodetect console type (if not done by palo yet) */ p = boot_command_line; @@ -75,16 +75,14 @@ void __init setup_cmdline(char **cmdline_p) strlcat(p, " earlycon=pdc", COMMAND_LINE_SIZE); #ifdef CONFIG_BLK_DEV_INITRD - if (boot_args[2] != 0) /* did palo pass us a ramdisk? */ - { - initrd_start = (unsigned long)__va(boot_args[2]); - initrd_end = (unsigned long)__va(boot_args[3]); - } -#endif + /* did palo pass us a ramdisk? */ + if (boot_args[2] != 0) { + initrd_start = (unsigned long)__va(boot_args[2]); + initrd_end = (unsigned long)__va(boot_args[3]); } +#endif strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; } #ifdef CONFIG_PA11 -- GitLab From 731c4eac848ff9dd42776da8ed3407b257e3abf0 Mon Sep 17 00:00:00 2001 From: Veronika Kabatova Date: Wed, 14 Dec 2022 14:20:02 +0100 Subject: [PATCH 680/694] buildtar: fix tarballs with EFI_ZBOOT enabled When CONFIG_EFI_ZBOOT is enabled, the binary name is not Image.gz anymore but vmlinuz.efi. No vmlinuz gets put into the tarball as the buildtar script doesn't recognize this name. Remedy this by adding the binary name to the list of acceptable files to package. Reported-by: CKI Project Signed-off-by: Veronika Kabatova Acked-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- scripts/package/buildtar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/buildtar b/scripts/package/buildtar index cb54c7f1aa806..4d6f0b128efd9 100755 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -122,7 +122,7 @@ case "${ARCH}" in fi ;; arm64) - for i in Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo ; do + for i in Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo vmlinuz.efi ; do if [ -f "${objtree}/arch/arm64/boot/${i}" ] ; then cp -v -- "${objtree}/arch/arm64/boot/${i}" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" break -- GitLab From 41f563ab3c33698bdfc3403c7c2e6c94e73681e4 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 10:45:14 +0800 Subject: [PATCH 681/694] parisc: led: Fix potential null-ptr-deref in start_task() start_task() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: start_task() create_singlethread_workqueue() # failed, led_wq is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. Fixes: 3499495205a6 ("[PARISC] Use work queue in LED/LCD driver instead of tasklet.") Signed-off-by: Shang XiaoJing Signed-off-by: Helge Deller Cc: --- drivers/parisc/led.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index d4be9d2ee74d9..8bdc5e043831c 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -137,6 +137,9 @@ static int start_task(void) /* Create the work queue and queue the LED task */ led_wq = create_singlethread_workqueue("led_wq"); + if (!led_wq) + return -ENOMEM; + queue_delayed_work(led_wq, &led_task, 0); return 0; -- GitLab From 71bdea6f798b425bc0003780b13e3fdecb16a010 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 11 Dec 2022 19:50:20 +0100 Subject: [PATCH 682/694] parisc: Align parisc MADV_XXX constants with all other architectures Adjust some MADV_XXX constants to be in sync what their values are on all other platforms. There is currently no reason to have an own numbering on parisc, but it requires workarounds in many userspace sources (e.g. glibc, qemu, ...) - which are often forgotten and thus introduce bugs and different behaviour on parisc. A wrapper avoids an ABI breakage for existing userspace applications by translating any old values to the new ones, so this change allows us to move over all programs to the new ABI over time. Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/mman.h | 29 +++++++++++------------ arch/parisc/kernel/sys_parisc.c | 28 ++++++++++++++++++++++ arch/parisc/kernel/syscalls/syscall.tbl | 2 +- tools/arch/parisc/include/uapi/asm/mman.h | 12 +++++----- tools/perf/bench/bench.h | 12 ---------- 5 files changed, 49 insertions(+), 34 deletions(-) diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 22133a6a506ef..68c44f99bc931 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -49,6 +49,19 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ @@ -57,27 +70,13 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ -#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ -#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ - -#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ -#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ - -#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, - overrides the coredump filter bits */ -#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ - -#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ -#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ - -#define MADV_COLLAPSE 73 /* Synchronous hugepage collapse */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ #define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ /* compatibility flags */ #define MAP_FILE 0 -#define MAP_VARIABLE 0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 848b0702005d6..09a34b07f02e6 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -465,3 +465,31 @@ asmlinkage long parisc_inotify_init1(int flags) flags = FIX_O_NONBLOCK(flags); return sys_inotify_init1(flags); } + +/* + * madvise() wrapper + * + * Up to kernel v6.1 parisc has different values than all other + * platforms for the MADV_xxx flags listed below. + * To keep binary compatibility with existing userspace programs + * translate the former values to the new values. + * + * XXX: Remove this wrapper in year 2025 (or later) + */ + +asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior) +{ + switch (behavior) { + case 65: behavior = MADV_MERGEABLE; break; + case 66: behavior = MADV_UNMERGEABLE; break; + case 67: behavior = MADV_HUGEPAGE; break; + case 68: behavior = MADV_NOHUGEPAGE; break; + case 69: behavior = MADV_DONTDUMP; break; + case 70: behavior = MADV_DODUMP; break; + case 71: behavior = MADV_WIPEONFORK; break; + case 72: behavior = MADV_KEEPONFORK; break; + case 73: behavior = MADV_COLLAPSE; break; + } + + return sys_madvise(start, len_in, behavior); +} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 8a99c998da9bb..0e42fceb2d5e2 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -131,7 +131,7 @@ 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common shutdown sys_shutdown 118 common fsync sys_fsync -119 common madvise sys_madvise +119 common madvise parisc_madvise 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index 506c06a6536fb..4cc88a642e106 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -1,20 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H -#define MADV_DODUMP 70 +#define MADV_DODUMP 17 #define MADV_DOFORK 11 -#define MADV_DONTDUMP 69 +#define MADV_DONTDUMP 16 #define MADV_DONTFORK 10 #define MADV_DONTNEED 4 #define MADV_FREE 8 -#define MADV_HUGEPAGE 67 -#define MADV_MERGEABLE 65 -#define MADV_NOHUGEPAGE 68 +#define MADV_HUGEPAGE 14 +#define MADV_MERGEABLE 12 +#define MADV_NOHUGEPAGE 15 #define MADV_NORMAL 0 #define MADV_RANDOM 1 #define MADV_REMOVE 9 #define MADV_SEQUENTIAL 2 -#define MADV_UNMERGEABLE 66 +#define MADV_UNMERGEABLE 13 #define MADV_WILLNEED 3 #define MAP_ANONYMOUS 0x10 #define MAP_DENYWRITE 0x0800 diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 6cefb4315d75e..a5d49b3b6a098 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime; * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these * tokens if they are not already defined. - * - * PA-RISC uses different madvise values from other architectures and - * needs to be special-cased. */ -#ifdef __hppa__ -# ifndef MADV_HUGEPAGE -# define MADV_HUGEPAGE 67 -# endif -# ifndef MADV_NOHUGEPAGE -# define MADV_NOHUGEPAGE 68 -# endif -#else # ifndef MADV_HUGEPAGE # define MADV_HUGEPAGE 14 # endif # ifndef MADV_NOHUGEPAGE # define MADV_NOHUGEPAGE 15 # endif -#endif int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); -- GitLab From fe94cb1a614d2df2764d49ac959d8b7e4cb98e15 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 14 Dec 2022 22:17:57 +0100 Subject: [PATCH 683/694] parisc: Drop PMD_SHIFT from calculation in pgtable.h PMD_SHIFT isn't defined if CONFIG_PGTABLE_LEVELS == 3, and as such the kernel test robot found this warning: In file included from include/linux/pgtable.h:6, from arch/parisc/kernel/head.S:23: arch/parisc/include/asm/pgtable.h:169:32: warning: "PMD_SHIFT" is not defined, evaluates to 0 [-Wundef] 169 | #if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT) Avoid the warning by using PLD_SHIFT and BITS_PER_PTE. Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: # 6.0+ --- arch/parisc/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index ecd0288544698..68ae77069d23f 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -166,8 +166,8 @@ extern void __update_cache(pte_t pte); /* This calculates the number of initial pages we need for the initial * page tables */ -#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT) -# define PT_INITIAL (1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT)) +#if (KERNEL_INITIAL_ORDER) >= (PLD_SHIFT + BITS_PER_PTE) +# define PT_INITIAL (1 << (KERNEL_INITIAL_ORDER - PLD_SHIFT - BITS_PER_PTE)) #else # define PT_INITIAL (1) /* all initial PTEs fit into one page */ #endif -- GitLab From 7236aae5f81f3efbd93d0601e74fc05994bc2580 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 26 Nov 2022 21:29:31 +0100 Subject: [PATCH 684/694] parisc: Fix locking in pdc_iodc_print() firmware call Utilize pdc_lock spinlock to protect parallel modifications of the iodc_dbuf[] buffer, check length to prevent buffer overflow of iodc_dbuf[], drop the iodc_retbuf[] buffer and fix some wrong indentings. Signed-off-by: Helge Deller Cc: # 6.0+ --- arch/parisc/kernel/firmware.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 6a7e315bcc2e5..a115315d88e69 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1288,9 +1288,8 @@ void pdc_io_reset_devices(void) #endif /* defined(BOOTLOADER) */ -/* locked by pdc_console_lock */ -static int __attribute__((aligned(8))) iodc_retbuf[32]; -static char __attribute__((aligned(64))) iodc_dbuf[4096]; +/* locked by pdc_lock */ +static char iodc_dbuf[4096] __page_aligned_bss; /** * pdc_iodc_print - Console print using IODC. @@ -1307,6 +1306,9 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) unsigned int i; unsigned long flags; + count = min_t(unsigned int, count, sizeof(iodc_dbuf)); + + spin_lock_irqsave(&pdc_lock, flags); for (i = 0; i < count;) { switch(str[i]) { case '\n': @@ -1322,12 +1324,11 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) } print: - spin_lock_irqsave(&pdc_lock, flags); - real32_call(PAGE0->mem_cons.iodc_io, - (unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT, - PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers), - __pa(iodc_retbuf), 0, __pa(iodc_dbuf), i, 0); - spin_unlock_irqrestore(&pdc_lock, flags); + real32_call(PAGE0->mem_cons.iodc_io, + (unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT, + PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers), + __pa(pdc_result), 0, __pa(iodc_dbuf), i, 0); + spin_unlock_irqrestore(&pdc_lock, flags); return i; } @@ -1354,10 +1355,11 @@ int pdc_iodc_getc(void) real32_call(PAGE0->mem_kbd.iodc_io, (unsigned long)PAGE0->mem_kbd.hpa, ENTRY_IO_CIN, PAGE0->mem_kbd.spa, __pa(PAGE0->mem_kbd.dp.layers), - __pa(iodc_retbuf), 0, __pa(iodc_dbuf), 1, 0); + __pa(pdc_result), 0, __pa(iodc_dbuf), 1, 0); ch = *iodc_dbuf; - status = *iodc_retbuf; + /* like convert_to_wide() but for first return value only: */ + status = *(int *)&pdc_result; spin_unlock_irqrestore(&pdc_lock, flags); if (status == 0) -- GitLab From 7e6652c79ecd74e1112500668d956367dc3772a5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 26 Nov 2022 21:35:29 +0100 Subject: [PATCH 685/694] parisc: Drop duplicate kgdb_pdc console The kgdb console is already implemented and registered in pdc_cons.c, so the duplicate code can be dropped. Signed-off-by: Helge Deller Cc: # 6.1+ --- arch/parisc/kernel/kgdb.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index ab7620f695be1..b16fa9bac5f44 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -208,23 +208,3 @@ int kgdb_arch_handle_exception(int trap, int signo, } return -1; } - -/* KGDB console driver which uses PDC to read chars from keyboard */ - -static void kgdb_pdc_write_char(u8 chr) -{ - /* no need to print char. kgdb will do it. */ -} - -static struct kgdb_io kgdb_pdc_io_ops = { - .name = "kgdb_pdc", - .read_char = pdc_iodc_getc, - .write_char = kgdb_pdc_write_char, -}; - -static int __init kgdb_pdc_init(void) -{ - kgdb_register_io_module(&kgdb_pdc_io_ops); - return 0; -} -early_initcall(kgdb_pdc_init); -- GitLab From 7dc4dbfe750e1f18c511e73c8ed114da8de9ff85 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 Dec 2022 17:45:40 +0100 Subject: [PATCH 686/694] parisc: Drop locking in pdc console code No need to have specific locking for console I/O since the PDC functions provide an own locking. Signed-off-by: Helge Deller Cc: # 6.1+ --- arch/parisc/kernel/pdc_cons.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 7d0989f523d03..cf3bf82323746 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -12,37 +12,27 @@ #include /* for PAGE0 */ #include /* for iodc_call() proto and friends */ -static DEFINE_SPINLOCK(pdc_console_lock); - static void pdc_console_write(struct console *co, const char *s, unsigned count) { int i = 0; - unsigned long flags; - spin_lock_irqsave(&pdc_console_lock, flags); do { i += pdc_iodc_print(s + i, count - i); } while (i < count); - spin_unlock_irqrestore(&pdc_console_lock, flags); } #ifdef CONFIG_KGDB static int kgdb_pdc_read_char(void) { - int c; - unsigned long flags; - - spin_lock_irqsave(&pdc_console_lock, flags); - c = pdc_iodc_getc(); - spin_unlock_irqrestore(&pdc_console_lock, flags); + int c = pdc_iodc_getc(); return (c <= 0) ? NO_POLL_CHAR : c; } static void kgdb_pdc_write_char(u8 chr) { - if (PAGE0->mem_cons.cl_class != CL_DUPLEX) - pdc_console_write(NULL, &chr, 1); + /* no need to print char as it's shown on standard console */ + /* pdc_iodc_print(&chr, 1); */ } static struct kgdb_io kgdb_pdc_io_ops = { -- GitLab From 4add395bc77f19e2e5cedbef4368ffdebc89b165 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 Dec 2022 18:38:03 +0100 Subject: [PATCH 687/694] parisc: Move pdc_result struct to firmware.c Signed-off-by: Helge Deller --- arch/parisc/kernel/firmware.c | 4 ++-- arch/parisc/kernel/real2.S | 17 ++--------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index a115315d88e69..b31775928bb02 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -74,8 +74,8 @@ static DEFINE_SPINLOCK(pdc_lock); #endif -extern unsigned long pdc_result[NUM_PDC_RESULT]; -extern unsigned long pdc_result2[NUM_PDC_RESULT]; +unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8); +unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8); #ifdef CONFIG_64BIT #define WIDE_FIRMWARE 0x1 diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S index 2b16d8d6598f1..4dc12c4c09809 100644 --- a/arch/parisc/kernel/real2.S +++ b/arch/parisc/kernel/real2.S @@ -15,28 +15,15 @@ #include - - .section .bss - - .export pdc_result - .export pdc_result2 - .align 8 -pdc_result: - .block ASM_PDC_RESULT_SIZE -pdc_result2: - .block ASM_PDC_RESULT_SIZE - .export real_stack - .export real32_stack .export real64_stack - .align 64 + __PAGE_ALIGNED_BSS real_stack: -real32_stack: real64_stack: .block 8192 #define N_SAVED_REGS 9 - + .section .bss save_cr_space: .block REG_SZ * N_SAVED_REGS save_cr_end: -- GitLab From 9086e6017957c5cd6ea28d94b70e0d513d6b7800 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 Dec 2022 20:05:43 +0100 Subject: [PATCH 688/694] parisc: Add missing FORCE prerequisites in Makefile Fix those make warnings: arch/parisc/kernel/vdso32/Makefile:30: FORCE prerequisite is missing arch/parisc/kernel/vdso64/Makefile:30: FORCE prerequisite is missing Add the missing FORCE prerequisites for all build targets identified by "make help". Fixes: e1f86d7b4b2a5213 ("kbuild: warn if FORCE is missing for if_changed(_dep,_rule) and filechk") Signed-off-by: Helge Deller Cc: # 5.18+ --- arch/parisc/kernel/vdso32/Makefile | 4 ++-- arch/parisc/kernel/vdso64/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile index 85b1c6d261d12..4459a48d23033 100644 --- a/arch/parisc/kernel/vdso32/Makefile +++ b/arch/parisc/kernel/vdso32/Makefile @@ -26,7 +26,7 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso32ld) # assembly rules for the .S files @@ -38,7 +38,7 @@ $(obj-cvdso32): %.o: %.c FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile index a30f5ec5eb4bf..f3d6045793f4c 100644 --- a/arch/parisc/kernel/vdso64/Makefile +++ b/arch/parisc/kernel/vdso64/Makefile @@ -26,7 +26,7 @@ $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso64ld) # assembly rules for the .S files @@ -35,7 +35,7 @@ $(obj-vdso64): %.o: %.S FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -- GitLab From 4934fbfb3ff09b8500f63d4624ed8b41647bb822 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 21 Oct 2022 10:03:52 +0200 Subject: [PATCH 689/694] parisc: Show MPE/iX model string at bootup Some (mostly 64-bit machines) machines allow to run MPE/iX and report the MPE model string via firmware call. Enhance the pdc_model_sysmodel() function to report that model string. Note that some 32-bit machines like the B160L wrongly report success for the firmware call, so include a check to prevent showing wrong info. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdc.h | 2 +- arch/parisc/kernel/firmware.c | 4 ++-- arch/parisc/kernel/processor.c | 9 +++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index fcbcf9a96c111..40793bef8429f 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -37,7 +37,7 @@ int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info, long mod_index, long addr_index); int pdc_model_info(struct pdc_model *model); -int pdc_model_sysmodel(char *name); +int pdc_model_sysmodel(unsigned int os_id, char *name); int pdc_model_cpuid(unsigned long *cpu_id); int pdc_model_versions(unsigned long *versions, int id); int pdc_model_capabilities(unsigned long *capabilities); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index b31775928bb02..4dfe1f49c5c8b 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -527,14 +527,14 @@ int pdc_model_info(struct pdc_model *model) * Using OS_ID_HPUX will return the equivalent of the 'modelname' command * on HP/UX. */ -int pdc_model_sysmodel(char *name) +int pdc_model_sysmodel(unsigned int os_id, char *name) { int retval; unsigned long flags; spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_SYSMODEL, __pa(pdc_result), - OS_ID_HPUX, __pa(name)); + os_id, __pa(name)); convert_to_wide(pdc_result); if (retval == PDC_OK) { diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index dddaaa6e7a825..ba07e760d3c76 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -272,10 +272,15 @@ void __init collect_boot_cpu_data(void) printk(KERN_INFO "capabilities 0x%lx\n", boot_cpu_data.pdc.capabilities); - if (pdc_model_sysmodel(boot_cpu_data.pdc.sys_model_name) == PDC_OK) - printk(KERN_INFO "model %s\n", + if (pdc_model_sysmodel(OS_ID_HPUX, boot_cpu_data.pdc.sys_model_name) == PDC_OK) + pr_info("HP-UX model name: %s\n", boot_cpu_data.pdc.sys_model_name); + serial_no[0] = 0; + if (pdc_model_sysmodel(OS_ID_MPEXL, serial_no) == PDC_OK && + serial_no[0]) + pr_info("MPE/iX model name: %s\n", serial_no); + dump_stack_set_arch_desc("%s", boot_cpu_data.pdc.sys_model_name); boot_cpu_data.hversion = boot_cpu_data.pdc.model.hversion; -- GitLab From 2b76cfe190305fe02d8120df64f2a1bb6a3d3889 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 10 Dec 2022 12:33:46 +0100 Subject: [PATCH 690/694] ARM: dts: spear: drop 0x from unit address By coding style, unit address should not start with 0x. Signed-off-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20221210113347.63939-1-krzysztof.kozlowski@linaro.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/spear300.dtsi | 2 +- arch/arm/boot/dts/spear310.dtsi | 2 +- arch/arm/boot/dts/spear320.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi index b39bd5a226276..f1135e887f7b8 100644 --- a/arch/arm/boot/dts/spear300.dtsi +++ b/arch/arm/boot/dts/spear300.dtsi @@ -46,7 +46,7 @@ status = "disabled"; }; - shirq: interrupt-controller@0x50000000 { + shirq: interrupt-controller@50000000 { compatible = "st,spear300-shirq"; reg = <0x50000000 0x1000>; interrupts = <28>; diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi index 77570833d46b2..ce08d88209403 100644 --- a/arch/arm/boot/dts/spear310.dtsi +++ b/arch/arm/boot/dts/spear310.dtsi @@ -34,7 +34,7 @@ status = "disabled"; }; - shirq: interrupt-controller@0xb4000000 { + shirq: interrupt-controller@b4000000 { compatible = "st,spear310-shirq"; reg = <0xb4000000 0x1000>; interrupts = <28 29 30 1>; diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi index b12474446a487..56f141297ea35 100644 --- a/arch/arm/boot/dts/spear320.dtsi +++ b/arch/arm/boot/dts/spear320.dtsi @@ -49,7 +49,7 @@ status = "disabled"; }; - shirq: interrupt-controller@0xb3000000 { + shirq: interrupt-controller@b3000000 { compatible = "st,spear320-shirq"; reg = <0xb3000000 0x1000>; interrupts = <30 28 29 1>; -- GitLab From 4c03c4188cfb831e4ac093599192aedd60625a45 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 14 Dec 2022 21:42:23 +0100 Subject: [PATCH 691/694] MAINTAINERS: add related dts to IXP4xx get_maintainer.pl currently give no maintainer on all IXP4xx DTS files. Add them to the set of files handled by IXP4xx maintainers. Signed-off-by: Corentin Labbe Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20221005105734.3513581-1-clabbe@baylibre.com Link: https://lore.kernel.org/r/20221214204223.177807-1-linus.walleij@linaro.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 886d3f69ee644..ea6c427e30879 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2266,6 +2266,7 @@ F: Documentation/devicetree/bindings/bus/intel,ixp4xx-expansion-bus-controller.y F: Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt F: Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml F: Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml +F: arch/arm/boot/dts/intel-ixp* F: arch/arm/mach-ixp4xx/ F: drivers/bus/intel-ixp4xx-eb.c F: drivers/clocksource/timer-ixp4xx.c -- GitLab From 4b88615950fc805690b92b46c8ab794beb4bd6aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:25:14 +0100 Subject: [PATCH 692/694] ARM: pxa: fix building with clang The integrated assembler in clang does not understand the xscale specific mra/mar instructions: arch/arm/mach-pxa/pxa27x.c:136:15: error: unsupported architectural extension: xscale asm volatile(".arch_extension xscale\n\t" arch/arm/mach-pxa/pxa27x.c:136:40: error: invalid instruction, did you mean: mcr, mla, mrc, mrs, msr? mra r2, r3, acc0 Since these are coprocessor features, the same can be expressed using mrrc/mcrr, so use that for builds with IAS. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20221215162529.3659187-1-arnd@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/pxa27x.c | 8 ++++++++ arch/arm/mach-pxa/pxa3xx.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index afbf6ace954fe..eea507fd5095f 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -133,8 +133,12 @@ void pxa27x_cpu_pm_enter(suspend_state_t state) #ifndef CONFIG_IWMMXT u64 acc0; +#ifndef CONFIG_AS_IS_LLVM asm volatile(".arch_extension xscale\n\t" "mra %Q0, %R0, acc0" : "=r" (acc0)); +#else + asm volatile("mrrc p0, 0, %Q0, %R0, c0" : "=r" (acc0)); +#endif #endif /* ensure voltage-change sequencer not initiated, which hangs */ @@ -153,8 +157,12 @@ void pxa27x_cpu_pm_enter(suspend_state_t state) case PM_SUSPEND_MEM: cpu_suspend(pwrmode, pxa27x_finish_suspend); #ifndef CONFIG_IWMMXT +#ifndef CONFIG_AS_IS_LLVM asm volatile(".arch_extension xscale\n\t" "mar acc0, %Q0, %R0" : "=r" (acc0)); +#else + asm volatile("mcrr p0, 0, %Q0, %R0, c0" :: "r" (acc0)); +#endif #endif break; } diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 979642aa7ffe0..b26f00fc75d54 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -108,8 +108,12 @@ static void pxa3xx_cpu_pm_suspend(void) #ifndef CONFIG_IWMMXT u64 acc0; +#ifdef CONFIG_CC_IS_GCC asm volatile(".arch_extension xscale\n\t" "mra %Q0, %R0, acc0" : "=r" (acc0)); +#else + asm volatile("mrrc p0, 0, %Q0, %R0, c0" : "=r" (acc0)); +#endif #endif /* resuming from D2 requires the HSIO2/BOOT/TPM clocks enabled */ @@ -137,8 +141,12 @@ static void pxa3xx_cpu_pm_suspend(void) AD3ER = 0; #ifndef CONFIG_IWMMXT +#ifndef CONFIG_AS_IS_LLVM asm volatile(".arch_extension xscale\n\t" "mar acc0, %Q0, %R0" : "=r" (acc0)); +#else + asm volatile("mcrr p0, 0, %Q0, %R0, c0" :: "r" (acc0)); +#endif #endif } -- GitLab From 6a7ee50f8f56dc181e1150cc101896053b02d220 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:26:20 +0100 Subject: [PATCH 693/694] ARM: disallow pre-ARMv5 builds with ld.lld lld cannot build for ARMv4/v4T targets because it inserts 'blx' instructions that are unsupported there: ld.lld: warning: lld uses blx instruction, no object with architecture supporting feature detected Add a Kconfig time dependency to prevent those targets from being selected in randconfig builds. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Link: https://github.com/llvm/llvm-project/issues/50764 Link: https://github.com/ClangBuiltLinux/linux/issues/964 Link: https://lore.kernel.org/r/20221215162635.3750763-1-arnd@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a08c9d092a332..349af93213342 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -342,12 +342,14 @@ comment "CPU Core family selection" config ARCH_MULTI_V4 bool "ARMv4 based platforms (FA526, StrongARM)" depends on !ARCH_MULTI_V6_V7 + depends on !LD_IS_LLD select ARCH_MULTI_V4_V5 select CPU_FA526 if !(CPU_SA110 || CPU_SA1100) config ARCH_MULTI_V4T bool "ARMv4T based platforms (ARM720T, ARM920T, ...)" depends on !ARCH_MULTI_V6_V7 + depends on !LD_IS_LLD select ARCH_MULTI_V4_V5 select CPU_ARM920T if !(CPU_ARM7TDMI || CPU_ARM720T || \ CPU_ARM740T || CPU_ARM9TDMI || CPU_ARM922T || \ -- GitLab From ba4b4d0293ed12ec2eda09e0329d9831243ca699 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:53:21 +0100 Subject: [PATCH 694/694] soc: tegra: fix CPU_BIG_ENDIAN dependencies My previous patch to prevent BPMP from being enabled on big endian kernels caused a build regression: WARNING: unmet direct dependencies detected for TEGRA_BPMP Depends on [n]: ARCH_TEGRA [=y] && TEGRA_HSP_MBOX [=y] && TEGRA_IVC [=y] && !CPU_BIG_ENDIAN [=y] Selected by [y]: - ARCH_TEGRA_186_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y] - ARCH_TEGRA_194_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y] - ARCH_TEGRA_234_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y] Add even more such dependencies for the SoC types that use the BPMP driver. Fixes: 4ddb1bf1a837 ("tegra: mark BPMP driver as little-endian only") Signed-off-by: Arnd Bergmann Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20221215165336.1781080-1-arnd@kernel.org Signed-off-by: Arnd Bergmann --- drivers/soc/tegra/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index d1ecadffa1bba..ba5a81fc813c6 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -95,6 +95,7 @@ config ARCH_TEGRA_210_SOC config ARCH_TEGRA_186_SOC bool "NVIDIA Tegra186 SoC" + depends on !CPU_BIG_ENDIAN select MAILBOX select TEGRA_BPMP select TEGRA_HSP_MBOX @@ -110,6 +111,7 @@ config ARCH_TEGRA_186_SOC config ARCH_TEGRA_194_SOC bool "NVIDIA Tegra194 SoC" + depends on !CPU_BIG_ENDIAN select MAILBOX select PINCTRL_TEGRA194 select TEGRA_BPMP @@ -121,6 +123,7 @@ config ARCH_TEGRA_194_SOC config ARCH_TEGRA_234_SOC bool "NVIDIA Tegra234 SoC" + depends on !CPU_BIG_ENDIAN select MAILBOX select TEGRA_BPMP select TEGRA_HSP_MBOX -- GitLab